[
  {
    "path": ".github/workflows/retype-action.yml",
    "content": "name: Publish Retype powered website to GitHub Pages\non:\n  workflow_dispatch:\n  push:\n    branches:\n      - master\n\njobs:\n  publish:\n    name: Publish to retype branch\n\n    runs-on: ubuntu-latest\n\n    permissions:\n      contents: write\n\n    steps:\n      - uses: actions/checkout@v2\n\n      - uses: retypeapp/action-build@latest\n\n      - uses: retypeapp/action-github-pages@latest\n        with:\n          update-branch: true\n"
  },
  {
    "path": ".gitignore",
    "content": "\n*.pt\n*.pth\n# anything in the folders : datasets , runs\nruns/\n*.lnk\n*.ps1\n*.docx"
  },
  {
    "path": "DLTA_AI_app/.flake8",
    "content": "[flake8]\nexclude = .anaconda3/*\nignore = E203, E741, W503, W504\n"
  },
  {
    "path": "DLTA_AI_app/.gitignore",
    "content": "/.cache/\n/.pytest_cache/\n\n/build/\n/dist/\n/*.egg-info/\n\n*.py[cdo]\n\n.DS_Store\n.idea/\n\n# mp4\n*.mp4\n\n# any thing in the folder test_videos\ntest_videos/*\n\nsaved_models.json"
  },
  {
    "path": "DLTA_AI_app/.gitmodules",
    "content": "[submodule \"github2pypi\"]\n\tpath = github2pypi\n\turl = https://github.com/wkentaro/github2pypi.git\n"
  },
  {
    "path": "DLTA_AI_app/__main__.py",
    "content": "import os\nimport sys\n\nsys.path.append(os.path.dirname(os.path.realpath(__file__)))\nos.chdir(os.path.dirname(os.path.realpath(__file__)))\n\nfrom PyQt6 import QtGui, QtWidgets, QtCore\n\nfrom labelme import __appname__\nfrom labelme import __version__\nfrom labelme.utils import newIcon\n\nimport qdarktheme\n\n\n\n\ndef main():\n    app = QtWidgets.QApplication(sys.argv)\n    QtWidgets.QApplication.setHighDpiScaleFactorRoundingPolicy(QtCore.Qt.HighDpiScaleFactorRoundingPolicy.RoundPreferFloor)\n    app.setApplicationName(__appname__)\n    app.setWindowIcon(newIcon(\"icon\"))\n    # create and show splash screen\n    splash_pix = QtGui.QPixmap('labelme/icons/splash_screen.png')\n\n    splash = QtWidgets.QSplashScreen(splash_pix)\n\n\n    # center the splash screen to the original screen size\n    try:\n        from screeninfo import get_monitors\n\n        original_width = get_monitors()[0].width\n        original_heigth = get_monitors()[0].height\n\n        slapsh_width = splash.width()\n        splash_height = splash.height()\n\n        splash.move(int((original_width - slapsh_width) / 2), int((original_heigth - splash_height) / 2))\n    except Exception as e:\n        pass\n\n\n\n    splash.show()\n\n    qss = \"\"\"\n    QMenuBar::item {\n        padding: 10px;\n        margin: 0 5px\n    }\n    QMenu{\n        border-radius: 5px;\n    }\n    QMenu::item{\n        padding: 8px;\n        margin: 5px;\n        border-radius: 5px;\n    }\n    QToolTip {\n            color: #111111;\n            background-color: #EEEEEE;\n            }\n    QCheckBox{\n        margin: 0 7px;\n    }\n    QComboBox{\n        font-size: 10pt;\n        font-weight: bold;\n    }\n    \"\"\"\n    try:\n        import yaml\n        with open (\"labelme/config/default_config.yaml\", \"r\") as f:\n            config = yaml.load(f, Loader=yaml.FullLoader)\n        qdarktheme.setup_theme(theme = config[\"theme\"], default_theme = \"dark\",  additional_qss=qss)\n    except Exception as e:\n        print(f\"ERROR {e}\")\n\n    # create main window\n    from labelme.app import MainWindow\n    win = MainWindow()\n    splash.finish(win)\n    win.showMaximized()\n\n    # close splash screen\n\n    win.raise_()\n    sys.exit(app.exec())\n\n\n# this main block is required to generate executable by pyinstaller\nif __name__ == \"__main__\":\n    main()"
  },
  {
    "path": "DLTA_AI_app/__main__.spec",
    "content": "# -*- mode: python -*-\n# vim: ft=python\n\nfrom glob import glob\n\n\nblock_cipher = None\n\ndatas_list = [ \n    ('models_menu/*.json', 'models_menu'),\n    ('models_menu/*.py', 'models_menu'),\n    ('ultralytics/' , 'ultralytics'),\n    ('labelme/' , 'labelme'),\n    ('mmdetection/' , 'mmdetection'),\n    ('trackers/' , 'trackers')\n]\n\nhiddenimports_list = [\n    'mmcv' ,\n    'mmcv._ext',\n    'torchvision']\n\na = Analysis(\n    ['__main__.py'],\n    pathex=[],\n    binaries=[],\n    datas=datas_list,\n    hiddenimports=hiddenimports_list,\n    hookspath=[],\n    hooksconfig={},\n    runtime_hooks=[],\n    excludes=[],\n    win_no_prefer_redirects=False,\n    win_private_assemblies=False,\n    cipher=block_cipher,\n    noarchive=False,\n)\npyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)\n\nexe = EXE(\n    pyz,\n    a.scripts,\n    [],\n    exclude_binaries=True,\n    name='DLTA-AI',\n    debug=False,\n    bootloader_ignore_signals=False,\n    strip=False,\n    upx=True,\n    console=True,\n    disable_windowed_traceback=False,\n    argv_emulation=False,\n    target_arch=None,\n    codesign_identity=None,\n    entitlements_file=None,\n    icon = \"C:\\Graduation Project\\Auto Annotation Tool\\DLTA-AI\\DLTA-AI-app\\labelme\\icons\\icon.png\"\n    \n)\ncoll = COLLECT(\n    exe,\n    a.binaries,\n    a.zipfiles,\n    a.datas,\n    strip=False,\n    upx=True,\n    upx_exclude=[],\n    name='DLTA-AI',\n)\n"
  },
  {
    "path": "DLTA_AI_app/inferencing.py",
    "content": "\nimport copy\nfrom supervision.detection.core import Detections\nfrom time import time\nimport torch\nfrom mmdet.apis import inference_detector, init_detector, async_inference_detector\nimport cv2\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport warnings\n# from ultralytics.yolo.utils.ops import Profile, non_max_suppression, scale_boxes, process_mask, process_mask_native\nfrom labelme.utils.helpers import mathOps\n\nwarnings.filterwarnings(\"ignore\")\n\n\nclass models_inference():\n    def __init__(self):\n        self.annotating_models = {}\n\n\n    def full_points(bbox):\n        return np.array([[bbox[0], bbox[1]], [bbox[0], bbox[3]], [bbox[2], bbox[3]], [bbox[2], bbox[1]]])\n\n    @torch.no_grad()\n    def decode_file(self, img, model, classdict, threshold=0.3, img_array_flag=False):\n\n        if model.__class__.__name__ == \"YOLO\":  \n            if isinstance(img, str):\n                img = cv2.imread(img)\n\n            # get image size\n            img_resized = cv2.resize (img , (640, 640))\n            # default yolo arguments from yolov8 tracking repo\n                # imgsz=(640, 640),  # inference size (height, width)\n                # conf_thres=0.25,  # confidence threshold\n                # iou_thres=0.45,  # NMS IOU threshold\n                # max_det=1000,  # maximum detections per image\n            results = model(img_resized , conf = 0.25 , iou=  0.45 , verbose = False)\n            results = results[0]\n            # if len results is 0 then return empty dict\n            if results.masks is None:\n                return {\"results\": {}}\n\n            masks = results.masks.cpu().numpy().masks\n            masks = masks > 0.0\n            org_size = img.shape[:2]\n            out_size = masks.shape[1:]\n\n            # print(f'org_size : {org_size} , out_size : {out_size}')\n\n            # convert boxes to original image size same as the masks (coords = coords * org_size / out_size)\n            boxes = results.boxes.xyxy.cpu().numpy()\n            boxes = boxes * np.array([org_size[1] / out_size[1], org_size[0] /\n                                     out_size[0], org_size[1] / out_size[1], org_size[0] / out_size[0]])\n\n            detections = Detections(\n                xyxy=boxes,\n                confidence=results.boxes.conf.cpu().numpy(),\n                class_id=results.boxes.cls.cpu().numpy().astype(int)\n            )\n\n            polygons = []\n            result_dict = {}\n\n            resize_factors = [org_size[0] / out_size[0] , org_size[1] / out_size[1]]\n            if len(masks) == 0:\n                return {\"results\":{}}\n            for mask in masks:\n                polygon = mathOps.mask_to_polygons(\n                    mask, resize_factors=resize_factors)\n                polygons.append(polygon)\n\n            # detection is a tuple of  (box, confidence, class_id, tracker_id)\n            ind = 0\n            res_list = []\n            for detection in detections:\n                if round(detection[1], 2) < float(threshold):\n                    continue\n                result = {}\n                result[\"class\"] = classdict.get(int(detection[2]))\n                result[\"confidence\"] = str(round(detection[1], 2))\n                result[\"bbox\"] = detection[0].astype(int)\n                result[\"seg\"] = polygons[ind]\n                ind += 1\n                if result[\"class\"] == None:\n                    continue\n                if len(result[\"seg\"]) < 3:\n                    continue\n\n                res_list.append(result)\n            result_dict[\"results\"] = res_list\n            return result_dict\n\n        if img_array_flag:\n            results = inference_detector(model, img)\n        else:\n            results = inference_detector(model, plt.imread(img))\n        # results = async_inference_detector(model, plt.imread(img_path))\n        torch.cuda.empty_cache()\n\n        results0 = []\n        results1 = []\n        for i in classdict.keys():\n            mask = results[0][i][:, 4] >= float(threshold)\n            results0.append(results[0][i][mask])\n            results1.append(list(np.array(results[1][i])[mask]))\n\n        # for i in classdict.keys():\n        #     results0.append(results[0][i])\n        #     results1.append(results[1][i])\n\n        # self.annotating_models[model.__class__.__name__] = [results0 , results1]\n        # print(self.annotating_models.keys())\n\n        # # if the length of the annotating_models is greater than 1 we need to merge the masks\n        # if len(self.annotating_models.keys()) > 1:\n        #     print(\"merging masks\")\n        #     results0,results1 =  self.merge_masks()\n\n        #     assert len(results0) == len(results1)\n        #     for i in range(len(results0)):\n        #         assert len(results0[i]) == len(results1[i])\n        return results0, results1\n\n    def polegonise(self, results0, results1, classdict, threshold=0.3, show_bbox_flag=False):\n        result_dict = {}\n        res_list = []\n\n        self.classes_numbering = [keyno for keyno in classdict.keys()]\n        # print(self.classes_numbering)\n        for classno in range(len(results0)):\n            for instance in range(len(results0[classno])):\n                if float(results0[classno][instance][-1]) < float(threshold):\n                    continue\n                result = {}\n                result[\"class\"] = classdict.get(\n                    self.classes_numbering[classno])\n                # Confidence\n                result[\"confidence\"] = str(\n                    round(results0[classno][instance][-1], 2))\n                if classno == 0:\n                    result[\"seg\"] = mathOps.mask_to_polygons(\n                        results1[classno][instance].astype(np.uint8), 10)\n                else:\n                    result[\"seg\"] = mathOps.mask_to_polygons(\n                        results1[classno][instance].astype(np.uint8), 25)\n\n                # result[\"bbox\"] = self.get_bbox(result[\"seg\"])\n                if show_bbox_flag:\n                    # result[\"bbox\"] = full_points(result[\"bbox\"]).tolist()\n                    # points = full_points(result[\"bbox\"])\n                    # result[\"x1\"] = points[0][0]\n                    # result[\"y1\"] = points[0][1]\n                    # result[\"x2\"] = points[1][0]\n                    # result[\"y2\"] = points[1][1]\n                    # result[\"x3\"] = points[2][0]\n                    # result[\"y3\"] = points[2][1]\n                    # result[\"x4\"] = points[3][0]\n                    # result[\"y4\"] = points[3][1]\n                    pass\n\n                if result[\"class\"] == None:\n                    continue\n                if len(result[\"seg\"]) < 3:\n                    continue\n                res_list.append(result)\n\n        result_dict[\"results\"] = res_list\n        return result_dict\n\n    def merge_masks(self):\n        tic = time()\n        result0 = []\n        result1 = []\n\n        # Counting for debugging purposes\n        # count the number of instances in each model\n        counts = count_instances(self.annotating_models)\n        # print the counts of each model\n        for model in counts.keys():\n            print(\"model {} has {} instances\".format(model, counts[model]))\n\n        # the following lines can be used if we use models with different number of classes\n        # classnos = []\n        # for model in self.annotating_models.keys():\n        #     classnos.append(len(self.annotating_models[model][1]))\n        # print(classnos)\n\n        # instead the following line of code will be used if we use models with the same number of classes\n        classnos = len(self.annotating_models[list(\n            self.annotating_models.keys())[0]][1])\n\n        merged_counts = 0\n        # initialize the result list with the same number of classes as the model with the most classes\n        for i in range(classnos):\n            result1.append([])\n            result0.append([])\n\n        # deep copy the annotating_models dict to pop all the masks we have merged (try delete it for future optimisation)\n        annotating_models_copy = copy.deepcopy(self.annotating_models)\n        # merge masks of the same class\n        for idx1, model in enumerate(self.annotating_models.keys()):\n            for classno in range(len(self.annotating_models[model][1])):\n                # check if an instance exists in the model in this class\n                if len(self.annotating_models[model][1][classno]) > 0:\n                    for instance in range(len(self.annotating_models[model][1][classno])):\n                        for idx2, model2 in enumerate(self.annotating_models.keys()):\n                            if model != model2 and idx2 > idx1:\n                                # print(type(annotating_models_copy[model][0][classno]),type(annotating_models_copy[model2][0][classno]))\n                                # check if the class exists in the other model\n                                if classno in range(len(self.annotating_models[model2][1])):\n                                    # check if an instance exists in the other model\n                                    if len(self.annotating_models[model2][1][classno]) > 0:\n                                        for instance2 in range(len(self.annotating_models[model2][1][classno])):\n                                            dirty = False\n                                            # print('checking class ' + str(classno)  ' of models ' + model + str(idx1) +  ' and ' + model2 + str(idx2))\n                                            # get the intersection percentage of the two masks\n                                            intersection = np.logical_and(\n                                                self.annotating_models[model][1][classno][instance], self.annotating_models[model2][1][classno][instance2])\n                                            intersection = np.sum(intersection)\n                                            union = np.logical_or(\n                                                self.annotating_models[model][1][classno][instance], self.annotating_models[model2][1][classno][instance2])\n                                            union = np.sum(union)\n                                            iou = intersection / union\n                                            # print('iou of class ' + str(classno) + ' instance ' + str(instance) + ' and instance ' + str(instance2) + ' is ' + str(iou))\n                                            if iou > 0.5:\n                                                if (annotating_models_copy[model][1][classno][instance] is None) or (annotating_models_copy[model2][1][classno][instance2] is None):\n                                                    dirty = True\n                                                if dirty == False:\n                                                    # merge their bboxes and store the result in result0\n                                                    bbox1 = self.annotating_models[model][0][classno][instance]\n                                                    bbox2 = self.annotating_models[model2][0][classno][instance2]\n                                                    bbox = [min(bbox1[0], bbox2[0]), min(bbox1[1], bbox2[1]), max(\n                                                        bbox1[2], bbox2[2]), max(bbox1[3], bbox2[3]), max(bbox1[4], bbox2[4])]\n                                                    result0[classno].append(\n                                                        bbox)\n                                                    # store the merged mask in result1\n                                                    result1[classno].append(np.logical_or(\n                                                        self.annotating_models[model][1][classno][instance], self.annotating_models[model2][1][classno][instance2]))\n                                                    # print('merging masks of class ' + str(classno) + ' instance ' + str(instance) + ' and instance ' + str(instance2) + ' of models ' + model + ' and ' + model2)\n                                                    merged_counts += 1\n                                                # remove the mask from both models\n                                                annotating_models_copy[model][1][classno][instance] = None\n                                                annotating_models_copy[model2][1][classno][instance2] = None\n                                                annotating_models_copy[model][0][classno][instance] = None\n                                                annotating_models_copy[model2][0][classno][instance2] = None\n                                                # continue to the next instance of the first model\n                                                break\n\n        counts_here = {}\n        # add the remaining masks to the result\n        for model in annotating_models_copy.keys():\n            counts_here[model] = 0\n            for classno in range(len(annotating_models_copy[model][1])):\n                for instance in range(len(annotating_models_copy[model][1][classno])):\n                    if annotating_models_copy[model][1][classno][instance] is not None:\n                        counts_here[model] += 1\n                        # print('adding mask of class ' + str(classno) + ' instance ' + str(instance) + ' of model ' + model)\n                        result1[classno].append(\n                            annotating_models_copy[model][1][classno][instance])\n                        result0[classno].append(\n                            annotating_models_copy[model][0][classno][instance])\n        # clear the annotating_models and add the result to it\n        self.annotating_models = {}\n        # self.annotating_models[\"merged\"] = [result0 , result1]\n        for model in counts_here.keys():\n            print(\"model {} has {} instances\".format(\n                model, counts_here[model]))\n        print(\"merged {} instances\".format(merged_counts))\n        tac = time()\n        print(\"merging took {} ms\".format((tac - tic) * 1000))\n        return result0, result1\n\n\n# result will have ---> bbox , confidence , class_id , tracker_id , segment\n# result of the detection phase only should be (bbox , confidence , class_id , segment)\ndef count_instances(annotating_models):\n    # separate the counts for each model\n    counts = {}\n    for model in annotating_models.keys():\n        counts[model] = 0\n        for classno in range(len(annotating_models[model][1])):\n            counts[model] += len(annotating_models[model][1][classno])\n    return counts\n"
  },
  {
    "path": "DLTA_AI_app/labelme/__init__.py",
    "content": "# flake8: noqa\n\nimport logging\nimport sys\n\nfrom qtpy import QT_VERSION\n\n\n__appname__ = \"DLTA-AI\"\n\n# Semantic Versioning 2.0.0: https://semver.org/\n# 1. MAJOR version when you make incompatible API changes;\n# 2. MINOR version when you add functionality in a backwards-compatible manner;\n# 3. PATCH version when you make backwards-compatible bug fixes.\n__version__ = \"1.1\"\n\nQT4 = QT_VERSION[0] == \"4\"\nQT5 = QT_VERSION[0] == \"5\"\ndel QT_VERSION\n\nPY2 = sys.version[0] == \"2\"\nPY3 = sys.version[0] == \"3\"\ndel sys\n\nfrom labelme.label_file import LabelFile\nfrom labelme import testing\nfrom labelme import utils\n"
  },
  {
    "path": "DLTA_AI_app/labelme/app.py",
    "content": "# -*- coding: utf-8 -*-\nimport functools\nimport json\nimport math\nimport re\nimport copy\nimport imgviz\nimport torch\nimport cv2\nimport warnings\nimport os\nimport os.path as osp\nimport numpy as np\nfrom pathlib import Path\n\nfrom PyQt6 import QtCore\nfrom PyQt6.QtCore import Qt, QThread\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\nfrom PyQt6.QtCore import QObject, pyqtSignal, pyqtSlot\n\nfrom . import __appname__\nfrom . import PY2\nfrom . import QT5\nfrom . import utils\n\nfrom .utils.sam import Sam_Predictor\nfrom .utils.helpers import visualizations, mathOps\nfrom .utils.custom_exports import custom_exports_list\n\nfrom .config import get_config\n\nfrom .label_file import LabelFile\nfrom .label_file import LabelFileError\n\nfrom .logger import logger\n\nfrom .shape import Shape\n\nfrom .widgets import BrightnessContrastDialog, Canvas, LabelDialog, LabelListWidget, LabelListWidgetItem, ToolBar, UniqueLabelQListWidget, ZoomWidget\nfrom .widgets import MsgBox, interpolation_UI, exportData_UI, deleteSelectedShape_UI, scaleObject_UI, getIDfromUser_UI, notification\nfrom .widgets import runtime_data_UI, preferences_UI, shortcut_selector_UI, links, feedback_UI, check_updates_UI\nfrom .widgets.editLabel_videoMode import editLabel_idChanged_UI, editLabel_handle_data\nfrom .widgets.segmentation_options_UI import SegmentationOptionsUI\nfrom .widgets.merge_feature_UI import MergeFeatureUI\n\nfrom .intelligence import Intelligence\nfrom .intelligence import coco_classes, color_palette\n\nfrom supervision.detection.core import Detections\nfrom trackers.multi_tracker_zoo import create_tracker\nfrom ultralytics.yolo.utils.torch_utils import select_device\n\nwarnings.filterwarnings(\"ignore\")\n\n# the root of the repo\nFILE = Path(__file__).resolve()\nROOT = FILE.parents[0]\nROOT = ROOT.parents[0]\n\nROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative\ndevice = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')\nreid_weights = Path('osnet_x1_0_msmt17.pt')\nLABEL_COLORMAP = imgviz.label_colormap(value=200)\n\n\nclass MainWindow(QtWidgets.QMainWindow):\n\n    FIT_WINDOW, FIT_WIDTH, MANUAL_ZOOM = 0, 1, 2\n\n    tracking_progress_bar_signal = pyqtSignal(int)\n\n    def __init__(\n        self,\n        config=None,\n        filename=None,\n        output=None,\n        output_file=None,\n        output_dir=None,\n    ):\n\n        self.buttons_text_style_sheet = \"QPushButton {font-size: 10pt; margin: 2px 5px; padding: 2px 7px;font-weight: bold; background-color: #0d69f5; color: #FFFFFF;} QPushButton:hover {background-color: #4990ED;} QPushButton:disabled {background-color: #7A7A7A;}\"\n\n        if output is not None:\n            logger.warning(\n                \"argument output is deprecated, use output_file instead\"\n            )\n            if output_file is None:\n                output_file = output\n\n        # see labelme/config/default_config.yaml for valid configuration\n        if config is None:\n            config = get_config()\n        self._config = config\n        self.decodingCanceled = False\n        # set default shape colors\n        Shape.line_color = QtGui.QColor(*self._config[\"shape\"][\"line_color\"])\n        Shape.fill_color = QtGui.QColor(*self._config[\"shape\"][\"fill_color\"])\n        Shape.select_line_color = QtGui.QColor(\n            *self._config[\"shape\"][\"select_line_color\"]\n        )\n        Shape.select_fill_color = QtGui.QColor(\n            *self._config[\"shape\"][\"select_fill_color\"]\n        )\n        Shape.vertex_fill_color = QtGui.QColor(\n            *self._config[\"shape\"][\"vertex_fill_color\"]\n        )\n        Shape.hvertex_fill_color = QtGui.QColor(\n            *self._config[\"shape\"][\"hvertex_fill_color\"]\n        )\n\n        # update models json\n        mathOps.update_saved_models_json(os.getcwd())\n\n        # add the segmentation UI controls interfance\n        self.segmentation_options_UI = SegmentationOptionsUI(self)\n        # add the merge ui interface\n        self.merge_feature_UI = MergeFeatureUI(self)\n\n        super(MainWindow, self).__init__()\n        try:\n            self.intelligenceHelper = Intelligence(self)\n        except:\n            print(\"it seems you have a problem with initializing model\\ncheck you have at least one model\")\n            self.helper_first_time_flag = True\n        else:\n            self.helper_first_time_flag = False\n        self.setWindowTitle(__appname__)\n\n        # Whether we need to save or not.\n        self.dirty = False\n\n        self._noSelectionSlot = False\n\n        # Main widgets and related state.\n        self.labelDialog = LabelDialog(\n            parent=self,\n            labels=self._config[\"labels\"],\n            sort_labels=self._config[\"sort_labels\"],\n            show_text_field=self._config[\"show_label_text_field\"],\n            completion=self._config[\"label_completion\"],\n            fit_to_content=self._config[\"fit_to_content\"],\n            flags=self._config[\"label_flags\"],\n        )\n\n        self.labelList = LabelListWidget()\n        self.lastOpenDir = None\n\n        self.flag_dock = self.flag_widget = None\n        self.flag_dock = QtWidgets.QDockWidget(self.tr(\"Flags\"), self)\n        self.flag_dock.setObjectName(\"Flags\")\n        self.flag_widget = QtWidgets.QListWidget()\n        if config[\"flags\"]:\n            self.loadFlags({k: False for k in config[\"flags\"]})\n        # self.flag_dock.setWidget(self.flag_widget)\n        self.flag_widget.itemChanged.connect(self.setDirty)\n\n\n        self.labelList.itemSelectionChanged.connect(self.labelSelectionChanged)\n        self.labelList.itemDoubleClicked.connect(self.editLabel)\n        self.labelList.itemChanged.connect(self.labelItemChanged)\n        self.labelList.itemDropped.connect(self.labelOrderChanged)\n        self.shape_dock = QtWidgets.QDockWidget(\n            self.tr(\"Polygon Labels\"), self\n        )\n        self.shape_dock.setObjectName(\"Labels\")\n        self.shape_dock.setWidget(self.labelList)\n\n        self.uniqLabelList = UniqueLabelQListWidget()\n        self.uniqLabelList.setToolTip(\n            self.tr(\n                \"Select label to start annotating for it. \"\n                \"Press 'Esc' to deselect.\"\n            )\n        )\n        if self._config[\"labels\"]:\n            for label in self._config[\"labels\"]:\n                item = self.uniqLabelList.createItemFromLabel(label)\n                self.uniqLabelList.addItem(item)\n                rgb = self._get_rgb_by_label(label)\n                self.uniqLabelList.setItemLabel(item, label, rgb)\n        self.label_dock = QtWidgets.QDockWidget(self.tr(u\"Label List\"), self)\n        self.label_dock.setObjectName(u\"Label List\")\n        self.label_dock.setWidget(self.uniqLabelList)\n\n        self.fileSearch = QtWidgets.QLineEdit()\n        self.fileSearch.setPlaceholderText(self.tr(\"Search Filename\"))\n        self.fileSearch.textChanged.connect(self.fileSearchChanged)\n        self.fileListWidget = QtWidgets.QListWidget()\n        self.fileListWidget.itemSelectionChanged.connect(\n            self.fileSelectionChanged\n        )\n        fileListLayout = QtWidgets.QVBoxLayout()\n        fileListLayout.setContentsMargins(0, 0, 0, 0)\n        fileListLayout.setSpacing(0)\n        fileListLayout.addWidget(self.fileSearch)\n        fileListLayout.addWidget(self.fileListWidget)\n        self.file_dock = QtWidgets.QDockWidget(self.tr(u\"File List\"), self)\n        self.file_dock.setObjectName(u\"Files\")\n        fileListWidget = QtWidgets.QWidget()\n        fileListWidget.setLayout(fileListLayout)\n        self.file_dock.setWidget(fileListWidget)\n\n        self.vis_dock = QtWidgets.QDockWidget(\n            self.tr(u\"Visualization Options\"), self)\n        self.vis_dock.setObjectName(u\"Visualization Options\")\n        self.vis_widget = QtWidgets.QWidget()\n        self.vis_dock.setWidget(self.vis_widget)\n        self.zoomWidget = ZoomWidget()\n        self.setAcceptDrops(True)\n\n\n\n        self.canvas = self.labelList.canvas = Canvas(\n            epsilon=self._config[\"epsilon\"],\n            double_click=self._config[\"canvas\"][\"double_click\"],\n            num_backups=self._config[\"canvas\"][\"num_backups\"],\n        )\n        self.canvas.zoomRequest.connect(self.zoomRequest)\n\n        scrollArea = QtWidgets.QScrollArea()\n        scrollArea.setWidget(self.canvas)\n        scrollArea.setWidgetResizable(True)\n        self.scrollBars = {\n            Qt.Orientation.Vertical: scrollArea.verticalScrollBar(),\n            Qt.Orientation.Horizontal: scrollArea.horizontalScrollBar(),\n            Qt.Orientation.Horizontal.value: scrollArea.horizontalScrollBar(),\n            Qt.Orientation.Vertical.value: scrollArea.verticalScrollBar(),\n        }\n        self.canvas.scrollRequest.connect(self.scrollRequest)\n\n        self.canvas.newShape.connect(self.newShape)\n        self.canvas.shapeMoved.connect(self.setDirty)\n        self.canvas.selectionChanged.connect(self.shapeSelectionChanged)\n        self.canvas.drawingPolygon.connect(self.toggleDrawingSensitive)\n        self.canvas.edgeSelected.connect(self.canvasShapeEdgeSelected)\n        self.canvas.APPrefresh.connect(self.refresh_image_MODE)\n        \n        \n        # adding toolbars of SAM and and related slots\n        self.addSamControls()\n\n        # Canvas SAM slots\n        self.canvas.pointAdded.connect(self.run_sam_model)\n        self.canvas.samFinish.connect(self.sam_finish_annotation_button_clicked)\n        \n        # SAM predictor\n        self.sam_predictor = None\n        self.current_sam_shape = None\n        self.SAM_SHAPES_IN_IMAGE = []\n        self.sam_last_mode = \"rectangle\"\n\n        self.setCentralWidget(scrollArea)\n\n        # for Export\n        self.target_directory = \"\"\n        self.save_path = \"\"\n        self.global_listObj = []\n\n        # for merge\n        self.multi_model_flag = False\n\n        # adding toolbars of video mode and and related slots\n        self.addVideoControls()\n        \n        # for video annotation and tracking\n        self.frame_time = 0\n        self.FRAMES_TO_SKIP = 30\n        self.TRACK_ASSIGNED_OBJECTS_ONLY = False\n        self.TrackingMode = False\n        self.current_annotation_mode = \"\"\n        self.CURRENT_ANNOATAION_FLAGS = {\"traj\": False,\n                                         \"bbox\": True,\n                                         \"id\": True,\n                                         \"class\": True,\n                                         \"mask\": True,\n                                         \"polygons\": True,\n                                         \"conf\": True}\n        self.CURRENT_ANNOATAION_TRAJECTORIES = {'length': 30,\n                                                'alpha': 0.70}\n        self.CURRENT_SHAPES_IN_IMG = []\n        self.featuresOptions = {'deleteDefault': \"this frame only\",\n                                'interpolationDefMethod': \"linear\",\n                                'interpolationDefType': \"all\",\n                                'interpolationOverwrite': False,\n                                'EditDefault': \"Edit only this frame\"}\n        self.key_frames = {}\n        self.id_frames_rec = {}\n        self.copiedShapes = []\n        self.INDEX_OF_CURRENT_FRAME = 1\n        self.interrupted = False\n        self.minID = -2\n        self.maxID = 0\n \n        for dock in [\"label_dock\", \"shape_dock\", \"file_dock\", \"vis_dock\"]:\n            if self._config[dock][\"closable\"]:\n                getattr(self, dock).setFeatures(QtWidgets.QDockWidget.DockWidgetFeature.DockWidgetClosable)\n            if self._config[dock][\"floatable\"]:\n                getattr(self, dock).setFeatures(QtWidgets.QDockWidget.DockWidgetFeature.DockWidgetFloatable)\n            if self._config[dock][\"movable\"]:\n                getattr(self, dock).setFeatures(QtWidgets.QDockWidget.DockWidgetFeature.DockWidgetMovable)\n            if self._config[dock][\"show\"] is False:\n                getattr(self, dock).setVisible(False)\n\n        self.addDockWidget(Qt.DockWidgetArea.RightDockWidgetArea, self.label_dock)\n        self.addDockWidget(Qt.DockWidgetArea.RightDockWidgetArea, self.shape_dock)\n        self.addDockWidget(Qt.DockWidgetArea.RightDockWidgetArea, self.file_dock)\n        self.addDockWidget(Qt.DockWidgetArea.RightDockWidgetArea, self.vis_dock)\n\n        # Actions\n        action = functools.partial(utils.newAction, self)\n        shortcuts = self._config[\"shortcuts\"]\n        quit = action(\n            self.tr(\"&Quit\"),\n            self.close,\n            shortcuts[\"quit\"],\n            \"quit\",\n            self.tr(\"Quit application\"),\n        )\n        open_ = action(\n            self.tr(\"&Open Image\"),\n            self.openFile,\n            shortcuts[\"open\"],\n            \"open\",\n            self.tr(f\"Open image or label file ({str(shortcuts['open'])})\"),\n        )\n        opendir = action(\n            self.tr(\"&Open Dir\"),\n            self.openDirDialog,\n            shortcuts[\"open_dir\"],\n            \"opendir\",\n            self.tr(f\"Open Dir ({str(shortcuts['open_dir'])})\"),\n        )\n        save = action(\n            self.tr(\"&Save\"),\n            self.saveFile,\n            shortcuts[\"save\"],\n            \"save\",\n            self.tr(f\"Save labels to file ({str(shortcuts['save'])})\"),\n            enabled=False,\n        )\n        export = action(\n            self.tr(\"&Export\"),\n            self.exportData,\n            shortcuts[\"export\"],\n            \"export\",\n            self.tr(\n                f\"Export annotations to COCO format ({str(shortcuts['export'])})\"),\n            enabled=False,\n        )\n        modelExplorer = action(\n            self.tr(\"&Model Explorer\"),\n            self.model_explorer,\n            None,\n            \"checklist\",\n            self.tr(u\"Model Explorer\"),\n        )\n        saveAs = action(\n            self.tr(\"&Save As\"),\n            self.saveFileAs,\n            shortcuts[\"save_as\"],\n            \"save-as\",\n            self.tr(\"Save labels to a different file\"),\n            enabled=False,\n        )\n        deleteFile = action(\n            self.tr(\"&Delete File\"),\n            self.deleteFile,\n            shortcuts[\"delete_file\"],\n            \"delete\",\n            self.tr(\"Delete current label file\"),\n            enabled=False,\n        )\n        changeOutputDir = action(\n            self.tr(\"&Change Output Dir\"),\n            slot=self.changeOutputDirDialog,\n            shortcut=shortcuts[\"save_to\"],\n            icon=\"open\",\n            tip=self.tr(u\"Change where annotations are loaded/saved\"),\n        )\n        saveAuto = action(\n            text=self.tr(\"Save &Automatically\"),\n            slot=lambda x: self.actions.saveAuto.setChecked(x),\n            icon=\"save\",\n            tip=self.tr(\"Save automatically\"),\n            checkable=True,\n            enabled=True,\n        )\n        saveAuto.setChecked(self._config[\"auto_save\"])\n        saveWithImageData = action(\n            text=\"Save With Image Data\",\n            slot=self.enableSaveImageWithData,\n            tip=\"Save image data in label file\",\n            checkable=True,\n            checked=self._config[\"store_data\"],\n        )\n        close = action(\n            \"&Close\",\n            self.closeFile,\n            shortcuts[\"close\"],\n            \"close\",\n            \"Close current file\",\n        )\n        toggle_keep_prev_mode = action(\n            self.tr(\"Keep Previous Annotation\"),\n            self.toggleKeepPrevMode,\n            shortcuts[\"toggle_keep_prev_mode\"],\n            None,\n            self.tr('Toggle \"keep pevious annotation\" mode'),\n            checkable=True,\n        )\n        toggle_keep_prev_mode.setChecked(self._config[\"keep_prev\"])\n        createMode = action(\n            self.tr(\"Create Polygons\"),\n            self.setCreateMode,\n            shortcuts[\"create_polygon\"],\n            \"objects\",\n            self.tr(\"Start drawing polygons\"),\n            enabled=False,\n        )\n        editMode = action(\n            self.tr(\"Edit Polygons\"),\n            self.setEditMode,\n            shortcuts[\"edit_polygon\"],\n            \"edit\",\n            self.tr(\"Move and edit the selected polygons\"),\n            enabled=False,\n        )\n        delete = action(\n            self.tr(\"Delete Polygons\"),\n            self.deleteSelectedShape,\n            shortcuts[\"delete_polygon\"],\n            \"close\",\n            self.tr(\"Delete the selected polygons\"),\n            enabled=False,\n        )\n        copy = action(\n            self.tr(\"Duplicate Polygons\"),\n            self.copySelectedShape,\n            shortcuts[\"duplicate_polygon\"],\n            \"copy\",\n            self.tr(\"Create a duplicate of the selected polygons\"),\n            enabled=False,\n        )\n        undoLastPoint = action(\n            self.tr(\"Undo last point\"),\n            self.canvas.undoLastPoint,\n            shortcuts[\"undo_last_point\"],\n            \"undo\",\n            self.tr(\"Undo last drawn point\"),\n            enabled=False,\n        )\n        addPointToEdge = action(\n            text=self.tr(\"Add Point to Edge\"),\n            slot=self.canvas.addPointToEdge,\n            shortcut=shortcuts[\"add_point_to_edge\"],\n            icon=\"add_point\",\n            tip=self.tr(\"Add point to the nearest edge\"),\n            enabled=False,\n        )\n        removePoint = action(\n            text=\"Remove Selected Point\",\n            slot=self.removeSelectedPoint,\n            icon=\"edit\",\n            tip=\"Remove selected point from polygon\",\n            enabled=False,\n        )\n        undo = action(\n            self.tr(\"Undo\"),\n            self.undoShapeEdit,\n            shortcuts[\"undo\"],\n            \"undo\",\n            self.tr(\"Undo last add and edit of shape\"),\n            enabled=False,\n        )\n        hideAll = action(\n            self.tr(\"&Hide\\nPolygons\"),\n            functools.partial(self.togglePolygons, False),\n            icon=\"eye\",\n            tip=self.tr(\"Hide all polygons\"),\n            enabled=False,\n        )\n        showAll = action(\n            self.tr(\"&Show\\nPolygons\"),\n            functools.partial(self.togglePolygons, True),\n            icon=\"eye\",\n            tip=self.tr(\"Show all polygons\"),\n            enabled=False,\n        )\n        zoom = QtWidgets.QWidgetAction(self)\n        zoom.setDefaultWidget(self.zoomWidget)\n        self.zoomWidget.setWhatsThis(\n            self.tr(\n                \"Zoom in or out of the image. Also accessible with \"\n                \"{} and {} from the canvas.\"\n            ).format(\n                utils.fmtShortcut(\n                    \"{},{}\".format(shortcuts[\"zoom_in\"], shortcuts[\"zoom_out\"])\n                ),\n                utils.fmtShortcut(self.tr(\"Ctrl+Wheel\")),\n            )\n        )\n        self.zoomWidget.setEnabled(False)\n\n        zoomIn = action(\n            self.tr(\"Zoom &In\"),\n            functools.partial(self.addZoom, 1.1),\n            shortcuts[\"zoom_in\"],\n            \"zoom-in\",\n            self.tr(\"Increase zoom level\"),\n            enabled=False,\n        )\n        zoomOut = action(\n            self.tr(\"&Zoom Out\"),\n            functools.partial(self.addZoom, 0.9),\n            shortcuts[\"zoom_out\"],\n            \"zoom-out\",\n            self.tr(\"Decrease zoom level\"),\n            enabled=False,\n        )\n        zoomOrg = action(\n            self.tr(\"&Original size\"),\n            functools.partial(self.setZoom, 100),\n            shortcuts[\"zoom_to_original\"],\n            \"zoom\",\n            self.tr(\"Zoom to original size\"),\n            enabled=False,\n        )\n        fitWindow = action(\n            self.tr(\"&Fit Window\"),\n            self.setFitWindow,\n            shortcuts[\"fit_window\"],\n            \"fit-window\",\n            self.tr(\"Zoom follows window size\"),\n            checkable=True,\n            enabled=False,\n        )\n        fitWidth = action(\n            self.tr(\"Fit &Width\"),\n            self.setFitWidth,\n            shortcuts[\"fit_width\"],\n            \"fit-width\",\n            self.tr(\"Zoom follows window width\"),\n            checkable=True,\n            enabled=False,\n        )\n        brightnessContrast = action(\n            \"&Brightness Contrast\",\n            self.brightnessContrast,\n            None,\n            \"color\",\n            \"Adjust brightness and contrast\",\n            enabled=False,\n        )\n        show_cross_line = action(\n            self.tr(\"&Toggle Cross Line\"),\n            self.enable_show_cross_line,\n            tip=self.tr(\"cross line for mouse position\"),\n            icon=\"cartesian\",\n            checkable=True,\n            checked=self._config[\"show_cross_line\"],\n            enabled=True,\n        )\n        \n        # Group zoom controls into a list for easier toggling.\n        zoomActions = (\n            self.zoomWidget,\n            zoomIn,\n            zoomOut,\n            zoomOrg,\n            fitWindow,\n            fitWidth,\n        )\n        self.zoomMode = self.FIT_WINDOW\n        fitWindow.setChecked(True)\n        self.scalers = {\n            self.FIT_WINDOW: self.scaleFitWindow,\n            self.FIT_WIDTH: self.scaleFitWidth,\n            # Set to one to scale to 100% when loading files.\n            self.MANUAL_ZOOM: lambda: 1,\n        }\n\n        edit = action(\n            self.tr(\"Edit &Label\"),\n            self.editLabel,\n            shortcuts[\"edit_label\"],\n            \"label\",\n            self.tr(\"Modify the label of the selected polygon\"),\n            enabled=False,\n        )\n        enhance = action(\n            self.tr(\"&Enhace Polygons\"),\n            self.sam_enhance_annotation_button_clicked,\n            shortcuts[\"SAM_enhance\"],\n            \"SAM\",\n            self.tr(\"Enhance the selected polygon with AI\"),\n            enabled=True,\n        )\n        interpolate = action(\n            self.tr(\"&Interpolation Tracking\"),\n            self.interpolateMENU,\n            shortcuts[\"interpolate\"],\n            \"tracking\",\n            self.tr(\"Interpolate the selected polygon between to frames to Track it\"),\n            enabled=True,\n        )\n        mark_as_key = action(\n            self.tr(\"&Mark as key\"),\n            self.mark_as_key,\n            shortcuts[\"mark_as_key\"],\n            \"mark\",\n            self.tr(\"Mark this frame as KEY for interpolation\"),\n            enabled=True,\n        )\n        remove_all_keyframes = action(\n            self.tr(\"&Remove all keyframes\"),\n            self.remove_all_keyframes,\n            None,\n            \"mark\",\n            self.tr(\"Remove all keyframes\"),\n            enabled=True,\n        )\n        scale = action(\n            self.tr(\"&Scale\"),\n            self.scaleMENU,\n            shortcuts[\"scale\"],\n            \"resize\",\n            self.tr(\"Scale the selected polygon\"),\n            enabled=True,\n        )\n        copyShapes = action(\n            self.tr(\"&Copy\"),\n            self.ctrlCopy,\n            shortcuts[\"copy\"],\n            \"copy\",\n            self.tr(\"Copy selected polygons\"),\n            enabled=True,\n        )\n        pasteShapes = action(\n            self.tr(\"&Paste\"),\n            self.ctrlPaste,\n            shortcuts[\"paste\"],\n            \"paste\",\n            self.tr(\"paste copied polygons\"),\n            enabled=True,\n        )\n        update_curr_frame = action(\n            self.tr(\"&Update current frame\"),\n            self.update_current_frame_annotation_button_clicked,\n            None,\n            \"done\",\n            self.tr(\"Update frame\"),\n            enabled=True,\n        )\n        ignore_changes = action(\n            self.tr(\"&Ignore changes\"),\n            self.main_video_frames_slider_changed,\n            shortcuts[\"ignore_updates\"],\n            \"delete\",\n            self.tr(\"Ignore unsaved changes\"),\n            enabled=True,\n        )\n\n        fill_drawing = action(\n            self.tr(\"Fill Drawing Polygon\"),\n            self.canvas.setFillDrawing,\n            None,\n            \"color\",\n            self.tr(\"Fill polygon while drawing\"),\n            checkable=True,\n            enabled=True,\n        )\n        fill_drawing.trigger()\n        \n        # intelligence actions\n        annotate_one_action = action(\n            self.tr(\"Run Model on Current Image\"),\n            self.annotate_one,\n            None,\n            \"open\",\n            self.tr(\"Run Model on Current Image\")\n        )\n        annotate_batch_action = action(\n            self.tr(\"Run Model on All Images\"),\n            self.annotate_batch,\n            None,\n            \"file\",\n            self.tr(\"Run Model on All Images\")\n        )\n        set_conf_threshold = action(\n            self.tr(\"Confidence Threshold\"),\n            self.setConfThreshold,\n            None,\n            \"tune\",\n            self.tr(\"Confidence Threshold\")\n        )\n        set_iou_threshold = action(\n            self.tr(\"IOU Threshold (NMS)\"),\n            self.setIOUThreshold,\n            None,\n            \"iou\",\n            self.tr(\"IOU Threshold (Non Maximum Suppression)\")\n        )\n        select_classes = action(\n            self.tr(\"Select Classes\"),\n            self.selectClasses,\n            None,\n            \"checklist\",\n            self.tr(\"Select Classes to be Annotated\")\n        )\n        merge_segmentation_models = action(\n            self.tr(\"Merge Segmentation Models\"),\n            self.mergeSegModels,\n            None,\n            \"merge\",\n            self.tr(\"Merge Segmentation Models\")\n        )\n        runtime_data = action(\n            self.tr(\"Show Runtime Data\"),\n            runtime_data_UI.PopUp,\n            None,\n            \"runtime\",\n            self.tr(\"Show Runtime Data\")\n        )\n        git_hub = action(\n            self.tr(\"GitHub Repository\"),\n            links.open_git_hub,\n            None,\n            \"github\",\n            self.tr(\"GitHub Repository\")\n        )\n        feedback = action(\n            self.tr(\"Feedback\"),\n            feedback_UI.PopUp,\n            None,\n            \"feedback\",\n            self.tr(\"Feedback\")\n        )\n        license = action(\n            self.tr(\"license\"),\n            links.open_license,\n            None,\n            \"license\",\n            self.tr(\"license\")\n        )\n        user_guide = action(\n            self.tr(\"User Guide\"),\n            links.open_guide,\n            None,\n            \"guide\",\n            self.tr(\"User Guide\")\n        )\n        check_updates = action(\n            self.tr(\"Check for Updates\"),\n            check_updates_UI.PopUp,\n            None,\n            \"info\",\n            self.tr(\"Check for Updates\")\n        )\n        preferences = action(\n            self.tr(\"Preferences\"),\n            preferences_UI.PopUp,\n            None,\n            \"settings\",\n            self.tr(\"Preferences\")\n        )\n        shortcut_selector = action(\n            self.tr(\"Shortcuts\"),\n            shortcut_selector_UI.PopUp,\n            None,\n            \"shortcuts\",\n            self.tr(\"Shortcuts\")\n        )\n        sam = action(\n            self.tr(\"Toggle SAM Toolbar\"),\n            self.Segment_anything,\n            None,\n            \"SAM\",\n            self.tr(\"Toggle SAM Toolbar\")\n        )\n        openVideo = action(\n            self.tr(\"Open &Video\"),\n            self.openVideo,\n            shortcuts[\"open_video\"],\n            \"video\",\n            self.tr(f\"Open a video file ({shortcuts['open_video']})\"),\n        )\n        openVideoFrames = action(\n            self.tr(\"Open Video as Frames\"),\n            self.openVideoFrames,\n            shortcuts[\"open_video_frames\"],\n            \"frames\",\n            self.tr(\n                f\"Open Video as Frames ({shortcuts['open_video_frames']})\"),\n        )\n\n        # Lavel list context menu.\n        labelmenu = QtWidgets.QMenu()\n        utils.addActions(labelmenu, (edit, delete))\n        self.labelList.setContextMenuPolicy(Qt.ContextMenuPolicy.CustomContextMenu)\n        self.labelList.customContextMenuRequested.connect(\n            self.popLabelListMenu\n        )\n\n        # Store actions for further handling.\n        self.actions = utils.struct(\n            saveAuto=saveAuto,\n            saveWithImageData=saveWithImageData,\n            changeOutputDir=changeOutputDir,\n            save=save,\n            saveAs=saveAs,\n            open=open_,\n            close=close,\n            deleteFile=deleteFile,\n            toggleKeepPrevMode=toggle_keep_prev_mode,\n            delete=delete,\n            edit=edit,\n            copy=copy,\n            undoLastPoint=undoLastPoint,\n            undo=undo,\n            addPointToEdge=addPointToEdge,\n            removePoint=removePoint,\n            createMode=createMode,\n            editMode=editMode,\n            zoom=zoom,\n            zoomIn=zoomIn,\n            zoomOut=zoomOut,\n            zoomOrg=zoomOrg,\n            fitWindow=fitWindow,\n            fitWidth=fitWidth,\n            brightnessContrast=brightnessContrast,\n            show_cross_line=show_cross_line,\n            zoomActions=zoomActions,\n            export=export,\n            openVideo=openVideo,\n            openVideoFrames=openVideoFrames,\n            fileMenuActions=(open_, opendir, save, saveAs, close, quit),\n            modelExplorer=modelExplorer,\n            runtime_data=runtime_data,\n            tool=(),\n            # XXX: need to add some actions here to activate the shortcut\n            editMenu=(\n                edit,\n                copy,\n                delete,\n                None,\n                undo,\n                undoLastPoint,\n                None,\n                addPointToEdge,\n            ),\n            # menu shown at right click\n            menu=(\n                createMode,\n                editMode,\n                edit,\n                enhance,\n                interpolate,\n                mark_as_key,\n                remove_all_keyframes,\n                scale,\n                copyShapes,\n                pasteShapes,\n                copy,\n                delete,\n                undo,\n                undoLastPoint,\n                addPointToEdge,\n                removePoint,\n                update_curr_frame,\n                ignore_changes\n            ),\n            onLoadActive=(\n                close,\n                createMode,\n                editMode,\n                brightnessContrast,\n            ),\n            onShapesPresent=(saveAs, hideAll, showAll),\n        )\n\n        self.canvas.vertexSelected.connect(self.actions.removePoint.setEnabled)\n\n        self.menus = utils.struct(\n            file=self.menu(self.tr(\"&File\")),\n            edit=self.menu(self.tr(\"&Edit\")),\n            view=self.menu(self.tr(\"&View\")),\n            intelligence=self.menu(self.tr(\"&Auto Annotation\")),\n            model_selection=self.menu(self.tr(\"&Model Selection\")),\n            options=self.menu(self.tr(\"&Options\")),\n            help=self.menu(self.tr(\"&Help\")),\n\n            recentFiles=QtWidgets.QMenu(self.tr(\"Open &Recent\")),\n            saved_models=QtWidgets.QMenu(self.tr(\"Select Segmentation model\")),\n            tracking_models=QtWidgets.QMenu(self.tr(\"Select Tracking model\")),\n            labelList=labelmenu,\n            certain_area=QtWidgets.QMenu(self.tr(\"Select Certain Area\")),\n\n            ui_elements=QtWidgets.QMenu(self.tr(\"&Show UI Elements\")),\n            zoom_options=QtWidgets.QMenu(self.tr(\"&Zoom Options\")),\n\n\n        )\n \n        utils.addActions(\n            self.menus.file,\n            (\n                open_,\n                opendir,\n                openVideo,\n                openVideoFrames,\n                None,\n                save,\n                saveAs,\n                export,\n                None,\n                close,\n                quit,\n            ),\n        )\n        utils.addActions(self.menus.intelligence,\n                         (annotate_one_action,\n                          annotate_batch_action,\n                          )\n                         )\n        # View menu and its submenus\n        self.menus.ui_elements.setIcon(QtGui.QIcon(\"labelme/icons/UI.png\"))\n        utils.addActions(self.menus.ui_elements,\n                         (\n                             self.vis_dock.toggleViewAction(),\n                             self.label_dock.toggleViewAction(),\n                             self.shape_dock.toggleViewAction(),\n                             self.file_dock.toggleViewAction(),\n                         )\n                         )\n        self.menus.zoom_options.setIcon(QtGui.QIcon(\"labelme/icons/zoom.png\"))\n        utils.addActions(self.menus.zoom_options,\n                         (\n                             zoomIn,\n                             zoomOut,\n                             zoomOrg,\n                             None,\n                             fitWindow,\n                             fitWidth,\n                         )\n                         )\n        utils.addActions(\n            self.menus.view,\n            (sam,\n                self.menus.ui_elements,\n                None,\n                hideAll,\n                showAll,\n                None,\n                self.menus.zoom_options,\n                None,\n                show_cross_line,\n             ),\n        )\n\n        # Model selection menu\n        self.menus.saved_models.setIcon(\n            QtGui.QIcon(\"labelme/icons/brain.png\"))\n        self.menus.tracking_models.setIcon(\n            QtGui.QIcon(\"labelme/icons/tracking.png\"))\n        self.menus.certain_area.setIcon(\n            QtGui.QIcon(\"labelme/icons/polygon.png\"))\n\n        utils.addActions(\n            self.menus.model_selection,\n            (\n                self.menus.saved_models,\n                merge_segmentation_models,\n                None,\n                self.menus.tracking_models,\n                None,\n                modelExplorer,\n\n            ),\n        )\n\n        # Options menu\n        utils.addActions(\n            self.menus.options,\n            (\n                set_conf_threshold,\n                set_iou_threshold,\n                self.menus.certain_area,\n                None,\n                select_classes,\n\n            ),\n        )\n        # Help menu\n        utils.addActions(\n            self.menus.help,\n            (\n                user_guide,\n                preferences,\n                shortcut_selector,\n                None,\n                git_hub,\n                feedback,\n                None,\n                runtime_data,\n                None,\n                license,\n                check_updates\n\n            ),\n        )\n\n        self.menus.file.aboutToShow.connect(self.updateFileMenu)\n        self.menus.file.aboutToShow.connect(self.update_models_menu)\n\n        # Custom context menu for the canvas widget:\n        utils.addActions(self.canvas.menus[0], self.actions.menu)\n        utils.addActions(\n            self.canvas.menus[1],\n            (\n                action(\"&Copy here\", self.copyShape),\n                action(\"&Move here\", self.moveShape),\n            ),\n        )\n\n        self.tools = self.toolbar(\"Tools\")\n        # Menu buttons on Left\n        self.actions.tool = (\n            open_,\n            opendir,\n            openVideo,\n            None,\n            save,\n            export,\n            None,\n            createMode,\n            editMode,\n            edit,\n            None,\n            delete,\n            undo,\n            None,\n\n        )\n        self.statusBar().showMessage(self.tr(\"%s started.\") % __appname__)\n        self.statusBar().show()\n\n        if output_file is not None and self._config[\"auto_save\"]:\n            logger.warn(\n                \"If `auto_save` argument is True, `output_file` argument \"\n                \"is ignored and output filename is automatically \"\n                \"set as IMAGE_BASENAME.json.\"\n            )\n        self.output_file = output_file\n        self.output_dir = output_dir\n\n        # Application state.\n        self.image = QtGui.QImage()\n\n        self.imagePath = None\n        self.recentFiles = []\n        self.maxRecent = 7\n        self.otherData = None\n        self.zoom_level = 100\n        self.fit_window = False\n        self.zoom_values = {}  # key=filename, value=(zoom_mode, zoom_value)\n        self.brightnessContrast_values = {}\n        self.scroll_values = {\n            Qt.Orientation.Horizontal: {},\n            Qt.Orientation.Vertical: {},\n            Qt.Orientation.Horizontal.value: {},\n            Qt.Orientation.Vertical.value: {},\n        }  # key=filename, value=scroll_value\n\n        if filename is not None and osp.isdir(filename):\n            self.importDirImages(filename, load=False)\n        else:\n            self.filename = filename\n\n        if config[\"file_search\"]:\n            self.fileSearch.setText(config[\"file_search\"])\n            self.fileSearchChanged()\n\n        # XXX: Could be completely declarative.\n        # Restore application settings.\n        self.settings = QtCore.QSettings(\"labelme\", \"labelme\")\n        # FIXME: QSettings.value can return None on PyQt4\n        self.recentFiles = self.settings.value(\"recentFiles\", []) or []\n        size = self.settings.value(\"window/size\", QtCore.QSize(600, 500))\n        position = self.settings.value(\"window/position\", QtCore.QPoint(0, 0))\n        self.resize(size)\n        self.move(position)\n        # or simply:\n        # self.restoreGeometry(settings['window/geometry']\n        self.restoreState(\n            self.settings.value(\"window/state\", QtCore.QByteArray())\n        )\n\n        # Populate the File menu dynamically.\n        self.updateFileMenu()\n        self.update_models_menu()\n        # Since loading the file may take some time,\n        # make sure it runs in the background.\n        if self.filename is not None:\n            self.queueEvent(functools.partial(self.loadFile, self.filename))\n\n        # Callbacks:\n        self.zoomWidget.valueChanged.connect(self.paintCanvas)\n\n        self.populateModeActions()\n        self.right_click_menu()\n\n        QtGui.QShortcut(QtGui.QKeySequence(self._config['shortcuts']['stop']), self).activated.connect(self.Escape_clicked)\n\n\n    def menu(self, title, actions=None):\n        menu = self.menuBar().addMenu(title)\n        if actions:\n            utils.addActions(menu, actions)\n        return menu\n\n    def toolbar(self, title, actions=None):\n        toolbar = ToolBar(title)\n        toolbar.setObjectName(\"%sToolBar\" % title)\n        # toolbar.setOrientation(Qt.Orientation.Vertical)\n        toolbar.setToolButtonStyle(Qt.ToolButtonStyle.ToolButtonTextUnderIcon)\n        if actions:\n            utils.addActions(toolbar, actions)\n        self.addToolBar(Qt.ToolBarArea.LeftToolBarArea, toolbar)\n        return toolbar\n\n    # Support Functions\n\n    def noShapes(self):\n        return not len(self.labelList)\n\n    def populateModeActions(self):\n        tool, menu = self.actions.tool, self.actions.menu\n        self.tools.clear()\n        utils.addActions(self.tools, tool)\n        self.canvas.menus[0].clear()\n        utils.addActions(self.canvas.menus[0], menu)\n        self.menus.edit.clear()\n        actions = (\n            self.actions.editMode,\n        )\n        utils.addActions(self.menus.edit, actions + self.actions.editMenu)\n\n    def setDirty(self):\n        # Even if we autosave the file, we keep the ability to undo\n        self.actions.undo.setEnabled(self.canvas.isShapeRestorable)\n\n        if self._config[\"auto_save\"] or self.actions.saveAuto.isChecked():\n            if self.output_dir:\n                label_file_without_path = osp.basename(label_file)\n                label_file = osp.join(self.output_dir, label_file_without_path)\n\n            if os.path.isdir(label_file):\n                os.remove(label_file)\n\n            self.saveLabels(label_file)\n            return\n        self.dirty = True\n        self.actions.save.setEnabled(True)\n        title = __appname__\n        if self.filename is not None:\n            title = \"{} - {}*\".format(title, self.filename)\n        self.setWindowTitle(title)\n\n    def setClean(self):\n        self.dirty = False\n        self.actions.save.setEnabled(False)\n        self.actions.createMode.setEnabled(True)\n        title = __appname__\n        if self.filename is not None:\n            title = \"{} - {}\".format(title, self.filename)\n        self.setWindowTitle(title)\n\n        if self.hasLabelFile():\n            self.actions.deleteFile.setEnabled(True)\n        else:\n            self.actions.deleteFile.setEnabled(False)\n\n    def toggleActions(self, value=True):\n        \"\"\"Enable/Disable widgets which depend on an opened image.\"\"\"\n        for z in self.actions.zoomActions:\n            z.setEnabled(value)\n        for action in self.actions.onLoadActive:\n            action.setEnabled(value)\n\n    def canvasShapeEdgeSelected(self, selected, shape):\n        self.actions.addPointToEdge.setEnabled(\n            selected and shape and shape.canAddPoint()\n        )\n\n    def queueEvent(self, function):\n        QtCore.QTimer.singleShot(0, function)\n\n    def status(self, message, delay=5000):\n        self.statusBar().showMessage(message, delay)\n\n    def resetState(self):\n        self.labelList.clear()\n        self.filename = None\n        self.imagePath = None\n        self.imageData = None\n        self.CURRENT_FRAME_IMAGE = None\n        self.labelFile = None\n        self.otherData = None\n        self.canvas.resetState()\n\n    def currentItem(self):\n        items = self.labelList.selectedItems()\n        if items:\n            return items[0]\n        return None\n\n    def addRecentFile(self, filename):\n        if filename in self.recentFiles:\n            self.recentFiles.remove(filename)\n        elif len(self.recentFiles) >= self.maxRecent:\n            self.recentFiles.pop()\n        self.recentFiles.insert(0, filename)\n\n    # Callbacks\n\n    def Escape_clicked(self):\n        \n        \"\"\"\n        Summary:\n            This function is called when the user presses the escape key.\n            It resets the SAM toolbar and the canvas.\n            It also interrupts the current annotation process like (tracking, interpolation, etc.)\n        \"\"\"\n\n        self.interrupted = True\n        self.sam_reset_button_clicked()\n        if self.canvas.tracking_area == \"drawing\":\n            self.certain_area_clicked(1)\n\n    def undoShapeEdit(self):\n        self.canvas.restoreShape()\n        self.labelList.clear()\n        self.loadShapes(self.canvas.shapes)\n        self.actions.undo.setEnabled(self.canvas.isShapeRestorable)\n\n    def toggleDrawingSensitive(self, drawing=True):\n        \"\"\"Toggle drawing sensitive.\n        In the middle of drawing, toggling between modes should be disabled.\n        \"\"\"\n        self.actions.editMode.setEnabled(not drawing)\n        self.actions.undoLastPoint.setEnabled(drawing)\n        self.actions.undo.setEnabled(not drawing)\n        self.actions.delete.setEnabled(not drawing)\n\n    def toggleDrawMode(self, edit=True, createMode=\"polygon\"):\n        self.canvas.setEditing(edit)\n        self.canvas.createMode = createMode\n        if edit:\n            self.actions.createMode.setEnabled(True)\n        else:\n            if createMode == \"polygon\":\n                self.actions.createMode.setEnabled(False)\n            else:\n                self.actions.createMode.setEnabled(True)\n        self.actions.editMode.setEnabled(not edit)\n\n    def setEditMode(self):\n\n        self.turnOFF_SAM()\n\n        try:\n            x = self.CURRENT_VIDEO_PATH\n        except:\n            self.toggleDrawMode(True)\n            return\n        self.update_current_frame_annotation()\n        self.toggleDrawMode(True)\n\n    def updateFileMenu(self):\n        current = self.filename\n\n        def exists(filename):\n            return osp.exists(str(filename))\n\n        menu = self.menus.recentFiles\n        menu.clear()\n        files = [f for f in self.recentFiles if f != current and exists(f)]\n        for i, f in enumerate(files):\n            icon = utils.newIcon(\"brain\")\n            action = QtGui.QAction(\n                icon, \"&%d %s\" % (i + 1, QtCore.QFileInfo(f).fileName()), self\n            )\n            action.triggered.connect(functools.partial(self.loadRecent, f))\n            menu.addAction(action)\n\n    def update_models_menu(self):\n\n        menu = self.menus.saved_models\n        menu.clear()\n\n        with open(\"saved_models.json\") as json_file:\n            data = json.load(json_file)\n            # loop through all the models\n            i = 0\n            for model_name in list(data.keys()):\n                if i >= 6:\n                    break\n                icon = utils.newIcon(\"brain\")\n                action = QtGui.QAction(\n                    icon, \"&%d %s\" % (i + 1, model_name), self)\n                action.triggered.connect(functools.partial(\n                    self.change_curr_model, model_name))\n                menu.addAction(action)\n                i += 1\n        self.add_tracking_models_menu()\n        self.add_certain_area_menu()\n\n    def add_tracking_models_menu(self):\n        menu2 = self.menus.tracking_models\n        menu2.clear()\n\n        icon = utils.newIcon(\"tracking\")\n        action = QtGui.QAction(\n            icon, \"1 Byte track (DEFAULT)\", self)\n        action.triggered.connect(\n            lambda: self.update_tracking_method('bytetrack'))\n        menu2.addAction(action)\n\n        icon = utils.newIcon(\"tracking\")\n        action = QtGui.QAction(\n            icon, \"2 Strong SORT  (lowest id switch)\", self)\n        action.triggered.connect(\n            lambda: self.update_tracking_method('strongsort'))\n        menu2.addAction(action)\n\n        icon = utils.newIcon(\"tracking\")\n        action = QtGui.QAction(\n            icon, \"3 Deep SORT\", self)\n        action.triggered.connect(\n            lambda: self.update_tracking_method('deepocsort'))\n        menu2.addAction(action)\n\n        icon = utils.newIcon(\"tracking\")\n        action = QtGui.QAction(\n            icon, \"4 OC SORT\", self)\n        action.triggered.connect(lambda: self.update_tracking_method('ocsort'))\n        menu2.addAction(action)\n\n        icon = utils.newIcon(\"tracking\")\n        action = QtGui.QAction(\n            icon, \"5 BoT SORT\", self)\n        action.triggered.connect(\n            lambda: self.update_tracking_method('botsort'))\n        menu2.addAction(action)\n\n    def add_certain_area_menu(self):\n        menu3 = self.menus.certain_area\n        menu3.clear()\n\n        icon = utils.newIcon(\"polygon\")\n        action = QtGui.QAction(\n            icon, \"Select Certain Area\", self)\n        action.triggered.connect(\n            lambda: self.certain_area_clicked(1))\n        menu3.addAction(action)\n\n        icon = utils.newIcon(\"rectangle\")\n        action = QtGui.QAction(\n            icon, \"Cancel Area\", self)\n        action.triggered.connect(\n            lambda: self.certain_area_clicked(0))\n        menu3.addAction(action)\n\n    def update_tracking_method(self, method='bytetrack'):\n        self.waitWindow(\n            visible=True, text=f'Please Wait.\\n{method} is Loading...')\n        self.tracking_method = method\n        self.tracking_config = ROOT / 'trackers' / \\\n            method / 'configs' / (method + '.yaml')\n        with torch.no_grad():\n            device = select_device('')\n            print(\n                f'tracking method {self.tracking_method} , config {self.tracking_config} , reid {reid_weights} , device {device} , half {False}')\n            self.tracker = create_tracker(\n                self.tracking_method, self.tracking_config, reid_weights, device, False)\n            if hasattr(self.tracker, 'model'):\n                if hasattr(self.tracker.model, 'warmup'):\n                    self.tracker.model.warmup()\n        self.waitWindow()\n\n        print(f'Changed tracking method to {method}')\n\n    def popLabelListMenu(self, point):\n        self.menus.labelList.exec(self.labelList.mapToGlobal(point))\n\n    def validateLabel(self, label):\n        # no validation\n        if self._config[\"validate_label\"] is None:\n            return True\n\n        for i in range(self.uniqLabelList.count()):\n            label_i = self.uniqLabelList.item(i).data(Qt.ItemDataRole.UserRole)\n            if self._config[\"validate_label\"] in [\"exact\"]:\n                if label_i == label:\n                    return True\n        return False\n\n    def setCreateMode(self):\n        self.turnON_SAM()\n        self.toggleDrawMode(False, createMode=\"polygon\")\n        return\n\n    def editLabel(self, item=None):\n        if self.current_annotation_mode == 'video':\n            self.update_current_frame_annotation()\n        if item and not isinstance(item, LabelListWidgetItem):\n            raise TypeError(\"item must be LabelListWidgetItem type\")\n        if not self.canvas.editing():\n            return\n        if not item:\n            item = self.currentItem()\n        if item is None:\n            return\n        shape = item.shape()\n        if shape is None:\n            return\n        old_text, old_flags, old_group_id, old_content = self.labelDialog.popUp(\n            text=shape.label,\n            flags=shape.flags,\n            group_id=shape.group_id,\n            content=shape.content,\n            skip_flag=True\n        )\n        text, flags, new_group_id, content = self.labelDialog.popUp(\n            text=shape.label,\n            flags=shape.flags,\n            group_id=shape.group_id,\n            content=shape.content\n        )\n\n        if text is None:\n            return\n        if not self.validateLabel(text):\n            self.errorMessage(\n                self.tr(\"Invalid label\"),\n                self.tr(\"Invalid label '{}' with validation type '{}'\").format(\n                    text, self._config[\"validate_label\"]\n                ),\n            )\n            return\n        shape.label = text\n        shape.flags = flags\n        shape.group_id = new_group_id\n        shape.content = str(content)\n\n        # if img or dir -> do smth then return\n        if self.current_annotation_mode == 'img' or self.current_annotation_mode == 'dir':\n            item.setText(f'{shape.label}')\n            self.setDirty()\n            if not self.uniqLabelList.findItemsByLabel(shape.label):\n                item = QtWidgets.QListWidgetItem()\n                item.setData(Qt.ItemDataRole.UserRole, shape.label)\n                self.uniqLabelList.addItem(item)\n            self.refresh_image_MODE()\n            return\n\n        # now we are in video mode\n        if shape.group_id is None:\n            item.setText(shape.label)\n\n        else:\n\n            idChanged = old_group_id != new_group_id\n            result, self.featuresOptions, only_this_frame, duplicates = editLabel_idChanged_UI(\n                self.featuresOptions,\n                old_group_id,\n                new_group_id,\n                self.id_frames_rec,\n                self.INDEX_OF_CURRENT_FRAME)\n\n            if duplicates or result != QtWidgets.QDialog.DialogCode.Accepted:\n                shape.label = old_text\n                shape.flags = old_flags\n                shape.content = old_content\n                shape.group_id = old_group_id\n                return\n\n            self.minID = min(self.minID, new_group_id - 1)\n\n            listObj = self.load_objects_from_json__orjson()\n\n            self.id_frames_rec, self.CURRENT_ANNOATAION_TRAJECTORIES, listObj = editLabel_handle_data(\n                currFrame=self.INDEX_OF_CURRENT_FRAME,\n                listObj=listObj,\n                trajectories=self.CURRENT_ANNOATAION_TRAJECTORIES,\n                id_frames_rec=self.id_frames_rec,\n                idChanged=idChanged,\n                only_this_frame=only_this_frame,\n                shape=shape,\n                old_group_id=old_group_id,\n                new_group_id=new_group_id,)\n\n            self.load_objects_to_json__orjson(listObj)\n            self.main_video_frames_slider_changed()\n\n    def mark_as_key(self):\n        \"\"\"\n        Summary:\n            This function is called when the user presses the \"Mark as Key\" button.\n            It marks the selected shape as a key frame.\n        \"\"\"\n        try:\n            self.update_current_frame_annotation()\n            id = self.canvas.selectedShapes[0].group_id\n            try:\n                if self.INDEX_OF_CURRENT_FRAME not in self.key_frames['id_' + str(id)]:\n                    self.key_frames['id_' +\n                                    str(id)].add(self.INDEX_OF_CURRENT_FRAME)\n                else:\n                    res = MsgBox.OKmsgBox(\n                        \"Caution\", f\"Frame {self.INDEX_OF_CURRENT_FRAME} is already a key frame for ID {id}.\\nDo you want to remove it?\", \"warning\", turnResult=True)\n                    if res == QtWidgets.QMessageBox.StandardButton.Ok:\n                        self.key_frames['id_' +\n                                        str(id)].remove(self.INDEX_OF_CURRENT_FRAME)\n                    else:\n                        return\n            except:\n                self.key_frames['id_' +\n                                str(id)] = set()\n                self.key_frames['id_' +\n                                str(id)].add(self.INDEX_OF_CURRENT_FRAME)\n            self.main_video_frames_slider_changed()\n        except Exception as e:\n            MsgBox.OKmsgBox(\"Error\", f\"Error: {e}\", \"critical\")\n\n    def remove_all_keyframes(self):\n        try:\n            self.update_current_frame_annotation()\n            id = self.canvas.selectedShapes[0].group_id\n            self.key_frames['id_' + str(id)] = set()\n        except:\n            pass\n\n    def rec_frame_for_id(self, id, frame, type_='add'):\n        \"\"\"\n        Summary:\n            To store the frames in which the object with the given id is present.\n\n        Args:\n            id (int): The id of the object.\n            frame (int): The frame number.\n            type_ (str, optional): 'add' or 'remove'. Defaults to 'add'.\n                                    'add' to add the frame to the list of frames in which the object is present.\n                                    'remove' to remove the frame from the list of frames in which the object is present.\n\n        Returns:\n            None\n        \"\"\"\n\n        if type_ == 'add':\n            try:\n                self.id_frames_rec['id_' + str(id)].add(frame)\n            except:\n                self.id_frames_rec['id_' + str(id)] = set()\n                self.id_frames_rec['id_' + str(id)].add(frame)\n        else:\n            try:\n                self.id_frames_rec['id_' + str(id)].remove(frame)\n            except:\n                pass\n\n    def interpolateMENU(self, item=None):\n        try:\n            if len(self.canvas.selectedShapes) == 0:\n                mb = QtWidgets.QMessageBox\n                msg = self.tr(\"Interpolate all IDs?\\n\")\n                answer = mb.warning(self, self.tr(\n                    \"Attention\"), msg, mb.StandardButton.Yes | mb.StandardButton.No)\n                if answer != mb.StandardButton.Yes:\n                    return\n                else:\n                    self.update_current_frame_annotation()\n                    keys = list(self.id_frames_rec.keys())\n                    idsORG = [int(keys[i][3:]) for i in range(len(keys))]\n            else:\n                self.update_current_frame_annotation()\n                idsORG = [shape.group_id for shape in self.canvas.selectedShapes]\n                id = self.canvas.selectedShapes[0].group_id\n\n            result, self.featuresOptions = interpolation_UI.PopUp(self.featuresOptions)\n            if result != QtWidgets.QDialog.DialogCode.Accepted:\n                return\n\n            with_linear = True if self.featuresOptions['interpolationDefMethod'] == 'linear' else False\n            with_sam = True if self.featuresOptions['interpolationDefMethod'] == 'SAM' else False\n            with_keyframes = True if self.featuresOptions['interpolationDefType'] == 'key' else False\n\n            if with_keyframes:\n                allAccepted, allRejected, ids = mathOps.checkKeyFrames(\n                    idsORG, self.key_frames)\n                if not allAccepted:\n                    if allRejected:\n                        MsgBox.OKmsgBox(\"Key Frames Error\",\n                                        f\"All of the selected IDs have no KEY frames.\\n    ie. less than 2 key frames\\n The interpolation is NOT performed.\")\n                        return\n                    else:\n                        resutl = MsgBox.OKmsgBox(\"Key Frames Error\",\n                                                 f\"Some of the selected IDs have no KEY frames.\\n    ie. less than 2 key frames\\n The interpolation is performed only for the IDs with KEY frames.\\nIDs: {ids}.\", \"info\", turnResult=True)\n                        if resutl != QtWidgets.QMessageBox.StandardButton.Ok:\n                            return\n            else:\n                ids = idsORG\n\n            self.interrupted = False\n            if with_sam:\n                self.interpolate_with_sam(ids, with_keyframes)\n            else:\n                for id in ids:\n                    QtWidgets.QApplication.processEvents()\n                    if self.interrupted:\n                        self.interrupted = False\n                        break\n                    self.interpolate(id=id,\n                                     only_edited=with_keyframes)\n            self.waitWindow()\n        except Exception as e:\n            MsgBox.OKmsgBox(\"Error\", f\"Error: {e}\", \"critical\")\n\n    def interpolate(self, id, only_edited=False):\n        \"\"\"\n        Summary:\n            This function is called when the user presses the \"Interpolate\" button.\n            It interpolates the object with the given id.\n\n        Args:\n            id (int): The id of the object.\n            only_edited (bool, optional): True to interpolate using only the key frames. Defaults to False.\n        \"\"\"\n\n        self.waitWindow(\n            visible=True, text=f'Please Wait.\\nID {id} is being interpolated...')\n\n        listObj = self.load_objects_from_json__orjson()\n\n        if only_edited:\n            try:\n                FRAMES = list(self.key_frames['id_' + str(id)])\n            except:\n                return\n        else:\n            FRAMES = list(self.id_frames_rec['id_' + str(id)]) if len(\n                self.id_frames_rec['id_' + str(id)]) > 1 else [-1]\n\n        first_frame_idx = min(FRAMES)\n        last_frame_idx = max(FRAMES)\n\n        if (first_frame_idx >= last_frame_idx):\n            return\n\n        records = [None for i in range(first_frame_idx - 1, last_frame_idx, 1)]\n        for frame in range(first_frame_idx, last_frame_idx + 1, 1):\n            listobjframe = listObj[frame - 1]['frame_idx']\n            frameobjects = listObj[frame - 1]['frame_data']\n            for object_ in frameobjects:\n                if (object_['tracker_id'] == id):\n                    if ((not only_edited) or (listobjframe in FRAMES)):\n                        records[frame -\n                                first_frame_idx] = copy.deepcopy(object_)\n                    break\n\n        baseObject = None\n        baseObjectFrame = None\n        nextObject = None\n        nextObjectFrame = None\n\n        for frame in range(first_frame_idx, last_frame_idx, 1):\n\n            QtWidgets.QApplication.processEvents()\n            if self.interrupted:\n                break\n\n            listobjframe = listObj[frame - 1]['frame_idx']\n            frameobjects = listObj[frame - 1]['frame_data']\n\n            # if object is present in this frame, then it is base object and we calculate next object\n            if (records[frame - first_frame_idx] is not None):\n\n                # assign it as base object\n                baseObject = copy.deepcopy(records[frame - first_frame_idx])\n                baseObjectFrame = frame\n\n                # find next object\n                for j in range(frame + 1, last_frame_idx + 1, 1):\n                    if (records[j - first_frame_idx] != None):\n                        nextObject = copy.deepcopy(\n                            records[j - first_frame_idx])\n                        nextObjectFrame = j\n                        break\n\n                # job done, go to next frame\n                continue\n\n            # if only_edited is true and the frame is not key, then we remove the object from the frame to be interpolated\n            if (only_edited and (frame not in FRAMES)):\n                for object_ in frameobjects:\n                    if (object_['tracker_id'] == id):\n                        listObj[frame - 1]['frame_data'].remove(object_)\n                        break\n\n            # if object is not present in this frame, then we calculate the object for this frame\n            cur = mathOps.getInterpolated(baseObject=baseObject,\n                                          baseObjectFrame=baseObjectFrame,\n                                          nextObject=nextObject,\n                                          nextObjectFrame=nextObjectFrame,\n                                          curFrame=frame,)\n            listObj[frame - 1]['frame_data'].append(cur)\n            self.rec_frame_for_id(id, frame)\n\n        self.load_objects_to_json__orjson(listObj)\n        frames = range(first_frame_idx - 1, last_frame_idx, 1)\n        self.calculate_trajectories(frames)\n        self.main_video_frames_slider_changed()\n\n    def interpolate_with_sam(self, idsLISTX, only_edited=False):\n        \"\"\"\n        Summary:\n            This function is called when the user chooses the \"Interpolate with SAM\".\n            It interpolates and inhance the objects with the given ids using SAM.\n\n        Args:\n            idsLISTX (list): The list of ids of the objects.\n        \"\"\"\n\n        self.waitWindow(\n            visible=True, text=f'Please Wait.\\nIDs are being interpolated with SAM...')\n\n        if self.sam_model_comboBox.currentText() == \"Select Model (SAM disabled)\":\n            MsgBox.OKmsgBox(\"SAM is disabled\",\n                            f\"SAM is disabled.\\nPlease enable SAM.\")\n            return\n\n        idsLIST = []\n        first_frame_idxLIST = []\n        last_frame_idxLIST = []\n        for id in idsLISTX:\n            try:\n                if only_edited:\n                    [minf, maxf] = [min(\n                        self.key_frames['id_' + str(id)]), max(self.key_frames['id_' + str(id)])]\n                else:\n                    [minf, maxf] = [min(\n                        self.id_frames_rec['id_' + str(id)]), max(self.id_frames_rec['id_' + str(id)])]\n            except:\n                continue\n            if minf == maxf:\n                continue\n            first_frame_idxLIST.append(minf)\n            last_frame_idxLIST.append(maxf)\n            idsLIST.append(id)\n\n        if len(idsLIST) == 0:\n            return\n\n        overwrite = self.featuresOptions['interpolationOverwrite']\n\n        listObj = self.load_objects_from_json__orjson()\n        listObjNEW = copy.deepcopy(listObj)\n\n        recordsLIST = [[None for ii in range(\n            first_frame_idxLIST[i], last_frame_idxLIST[i] + 1)] for i in range(len(idsLIST))]\n\n        for i in range(min(first_frame_idxLIST) - 1, max(last_frame_idxLIST), 1):\n            self.waitWindow(visible=True)\n            listobjframe = listObj[i]['frame_idx']\n            frameobjects = listObj[i]['frame_data'].copy()\n            for object_ in frameobjects:\n                if (object_['tracker_id'] in idsLIST):\n                    index = idsLIST.index(object_['tracker_id'])\n                    recordsLIST[index][listobjframe -\n                                       first_frame_idxLIST[index]] = copy.deepcopy(object_)\n                    listObj[i]['frame_data'].remove(object_)\n\n        for frameIDX in range(min(first_frame_idxLIST), max(last_frame_idxLIST) + 1):\n            QtWidgets.QApplication.processEvents()\n            if self.interrupted:\n                self.interrupted = False\n                break\n            self.waitWindow(\n                visible=True, text=f'Please Wait.\\nIDs are being interpolated with SAM...\\nFrame {frameIDX}')\n\n            frameIMAGE = self.get_frame_by_idx(frameIDX)\n\n            for ididx in range(len(idsLIST)):\n                i = frameIDX - first_frame_idxLIST[ididx]\n                self.waitWindow(visible=True)\n                if frameIDX < first_frame_idxLIST[ididx] or frameIDX > last_frame_idxLIST[ididx]:\n                    continue\n\n                records = recordsLIST[ididx]\n                if (records[i] != None):\n                    current = copy.deepcopy(records[i])\n                    cur_bbox = current['bbox']\n                    if not overwrite:\n                        listObj[frameIDX - 1]['frame_data'].append(current)\n                        continue\n                else:\n                    prev_idx = i - 1\n                    current = copy.deepcopy(records[i - 1])\n\n                    next_idx = i + 1\n                    for j in range(i + 1, len(records)):\n                        self.waitWindow(visible=True)\n                        if (records[j] != None):\n                            next_idx = j\n                            break\n                    cur_bbox = ((next_idx - i) / (next_idx - prev_idx)) * np.array(records[prev_idx]['bbox']) + (\n                        (i - prev_idx) / (next_idx - prev_idx)) * np.array(records[next_idx]['bbox'])\n                    cur_bbox = [int(cur_bbox[i]) for i in range(len(cur_bbox))]\n                    current['bbox'] = copy.deepcopy(cur_bbox)\n\n                    records[i] = current\n\n                try:\n                    same_image = self.sam_predictor.check_image(\n                        frameIMAGE)\n                except:\n                    return\n\n                cur_bbox, cur_segment = self.sam_enhanced_bbox_segment(\n                    frameIMAGE, cur_bbox, 1.2, max_itr=5, forSHAPE=False)\n\n                current['bbox'] = copy.deepcopy(cur_bbox)\n                current['segment'] = copy.deepcopy(cur_segment)\n\n                # append the shape frame by frame (cause we already removed it in the prev. for loop)\n                listObj[frameIDX - 1]['frame_data'].append(current)\n                self.rec_frame_for_id(idsLIST[ididx], frameIDX)\n\n            # update frame by frame to the to-be-uploaded listObj\n            listObjNEW[frameIDX - 1] = copy.deepcopy(listObj[frameIDX - 1])\n\n        self.load_objects_to_json__orjson(listObjNEW)\n        self.calculate_trajectories(\n            range(min(first_frame_idxLIST) - 1, max(last_frame_idxLIST), 1))\n        self.main_video_frames_slider_changed()\n\n        # Notify the user that the interpolation is finished\n        self._config = get_config()\n        if not self._config[\"mute\"]:\n            if not self.isActiveWindow():\n                notification.PopUp(\"SAM Interpolation Completed\")\n\n    def get_frame_by_idx(self, frameIDX):\n        self.CAP.set(cv2.CAP_PROP_POS_FRAMES, frameIDX - 1)\n        success, img = self.CAP.read()\n        return img\n\n    def scaleMENU(self):\n        \"\"\"\n        Summary:\n            This function is called when the user presses the \"Scale\" button.\n            It scales the selected shape.\n        \"\"\"\n\n        if len(self.canvas.selectedShapes) != 1:\n            MsgBox.OKmsgBox(f'Scale error',\n                            f'There is {len(self.canvas.selectedShapes)} selected shapes. Please select only one shape to scale.')\n            return\n\n        result = scaleObject_UI.PopUp(self)\n        if result == QtWidgets.QDialog.DialogCode.Accepted:\n            self.update_current_frame_annotation_button_clicked()\n            return\n        else:\n            self.main_video_frames_slider_changed()\n            return\n\n    def ctrlCopy(self):\n        \"\"\"\n        Summary:\n            This function is called when the user presses the \"Copy\" button.\n            It copies the selected shape(s).\n        \"\"\"\n\n        if len(self.canvas.selectedShapes) == 0:\n            return\n        self.copiedShapes = copy.deepcopy(self.canvas.selectedShapes)\n\n    def ctrlPaste(self):\n        \"\"\"\n        Summary:\n            This function is called when the user presses the \"Paste\" button.\n            It pastes the copied shape(s).\n        \"\"\"\n\n        if len(self.copiedShapes) == 0:\n            return\n\n        ids = [shape.group_id for shape in self.canvas.shapes]\n        flag = False\n\n        for shape in self.copiedShapes:\n            if shape.group_id in ids:\n                flag = True\n                continue\n            self.canvas.shapes.append(shape)\n            self.addLabel(shape)\n            self.rec_frame_for_id(shape.group_id, self.INDEX_OF_CURRENT_FRAME)\n\n        if flag:\n            MsgBox.OKmsgBox(\"IDs already exist\",\n                            \"A Shape(s) with the same ID(s) already exist(s) in this frame.\\n\\nShapes with no duplicate IDs are Copied Successfully.\")\n\n        if self.current_annotation_mode == \"video\":\n            self.update_current_frame_annotation_button_clicked()\n\n    def fileSearchChanged(self):\n        self.importDirImages(\n            self.lastOpenDir,\n            pattern=self.fileSearch.text(),\n            load=False,\n        )\n\n    def fileSelectionChanged(self):\n        items = self.fileListWidget.selectedItems()\n        if not items:\n            return\n        item = items[0]\n\n        if not self.mayContinue():\n            return\n\n        currIndex = self.imageList.index(str(item.text()))\n        if currIndex < len(self.imageList):\n            filename = self.imageList[currIndex]\n            if filename:\n                self.loadFile(filename)\n                self.refresh_image_MODE()\n\n    # React to canvas signals.\n    def shapeSelectionChanged(self, selected_shapes):\n        try:\n            self._noSelectionSlot = True\n            for shape in self.canvas.selectedShapes:\n                shape.selected = False\n            self.labelList.clearSelection()\n            self.canvas.selectedShapes = selected_shapes\n            for shape in self.canvas.selectedShapes:\n                shape.selected = True\n                item = self.labelList.findItemByShape(shape)\n                self.labelList.selectItem(item)\n                self.labelList.scrollToItem(item)\n            self._noSelectionSlot = False\n            n_selected = len(selected_shapes)\n            self.actions.delete.setEnabled(n_selected)\n            self.actions.copy.setEnabled(n_selected)\n            self.actions.edit.setEnabled(n_selected == 1)\n        except Exception as e:\n            pass\n\n    def addLabel(self, shape):\n        if shape.group_id is None or self.current_annotation_mode != \"video\":\n            text = shape.label\n        else:\n            text = f' ID {shape.group_id}: {shape.label}'\n        label_list_item = LabelListWidgetItem(text, shape)\n        self.labelList.addItem(label_list_item)\n        if not self.uniqLabelList.findItemsByLabel(shape.label):\n            item = self.uniqLabelList.createItemFromLabel(shape.label)\n            self.uniqLabelList.addItem(item)\n            rgb = self._get_rgb_by_label(shape.label)\n            self.uniqLabelList.setItemLabel(item, shape.label, rgb)\n        self.labelDialog.addLabelHistory(shape.label)\n        for action in self.actions.onShapesPresent:\n            action.setEnabled(True)\n\n        rgb = self._get_rgb_by_label(shape.label)\n\n        r, g, b = rgb\n        label_list_item.setText(\n            '{} <font color=\"#{:02x}{:02x}{:02x}\">●</font>'.format(\n                text, r, g, b\n            )\n        )\n        shape.line_color = QtGui.QColor(r, g, b)\n        shape.vertex_fill_color = QtGui.QColor(r, g, b)\n        shape.hvertex_fill_color = QtGui.QColor(255, 255, 255)\n        shape.fill_color = QtGui.QColor(r, g, b, 128)\n        shape.select_line_color = QtGui.QColor(255, 255, 255)\n        shape.select_fill_color = QtGui.QColor(r, g, b, 155)\n\n    def _get_rgb_by_label(self, label):\n        if self._config[\"shape_color\"] == \"auto\":\n            label_ascii = sum([ord(c) for c in label])\n            idx = label_ascii % len(color_palette)\n            color = color_palette[idx]\n            # convert color from bgr to rgb\n            return color[::-1]\n\n        elif (\n            self._config[\"shape_color\"] == \"manual\"\n            and self._config[\"label_colors\"]\n            and label in self._config[\"label_colors\"]\n        ):\n            return self._config[\"label_colors\"][label]\n        elif self._config[\"default_shape_color\"]:\n            return self._config[\"default_shape_color\"]\n\n    def remLabels(self, shapes):\n        for shape in shapes:\n            item = self.labelList.findItemByShape(shape)\n            self.labelList.removeItem(item)\n\n    def loadShapes(self, shapes, replace=True):\n        self._noSelectionSlot = True\n        # sort shapes by group_id but only if its not None\n        shapes = sorted(shapes, key=lambda x: int(x.group_id)\n                        if x.group_id is not None else 0)\n        for shape in shapes:\n            self.addLabel(shape)\n        self.labelList.clearSelection()\n        self._noSelectionSlot = False\n        self.canvas.loadShapes(shapes, replace=replace)\n        for shape in self.canvas.shapes:\n            self.canvas.setShapeVisible(\n                shape, self.CURRENT_ANNOATAION_FLAGS[\"polygons\"])\n\n    def loadLabels(self, shapes, replace=True):\n        s = []\n        for shape in shapes:\n            label = shape[\"label\"]\n            points = shape[\"points\"]\n            bbox = shape[\"bbox\"]\n            shape_type = shape[\"shape_type\"]\n            content = shape[\"content\"]\n            group_id = shape[\"group_id\"]\n\n            if not points:\n                # skip point-empty shape\n                continue\n\n            shape = Shape(\n                label=label,\n                shape_type=shape_type,\n                group_id=group_id,\n                content=content,\n            )\n            for i in range(0, len(points), 2):\n                shape.addPoint(QtCore.QPointF(points[i], points[i + 1]))\n            shape.close()\n\n            default_flags = {}\n            if self._config[\"label_flags\"]:\n                for pattern, keys in self._config[\"label_flags\"].items():\n                    if re.match(pattern, label):\n                        for key in keys:\n                            default_flags[key] = False\n            shape.flags = default_flags\n\n            s.append(shape)\n        self.loadShapes(s, replace=replace)\n\n    def loadFlags(self, flags):\n        self.flag_widget.clear()\n        for key, flag in flags.items():\n            item = QtWidgets.QListWidgetItem(key)\n            item.setFlags(item.flags() | Qt.ItemIsUserCheckable)\n            item.setCheckState(Qt.CheckState.Checked if flag else Qt.CheckState.Unchecked)\n            self.flag_widget.addItem(item)\n\n    def saveLabels(self, filename):\n        lf = LabelFile()\n\n        def format_shape(s):\n            data = s.other_data.copy()\n            data.update(\n                dict(\n                    label=s.label.encode(\"utf-8\") if PY2 else s.label,\n                    # convert points into 1D array\n                    points=mathOps.flattener(s.points),\n                    bbox=s.bbox,\n                    group_id=s.group_id,\n                    content=s.content,\n                    shape_type=s.shape_type,\n                    flags=s.flags,\n                )\n            )\n            return data\n\n        shapes = [format_shape(item.shape()) for item in self.labelList]\n        flags = {}\n        for i in range(self.flag_widget.count()):\n            item = self.flag_widget.item(i)\n            key = item.text()\n            flag = item.checkState() == Qt.CheckState.Checked\n            flags[key] = flag\n        try:\n            imagePath = osp.relpath(self.imagePath, osp.dirname(filename))\n            imageData = self.imageData if self._config[\"store_data\"] else None\n            if osp.dirname(filename) and not osp.exists(osp.dirname(filename)):\n                os.makedirs(osp.dirname(filename))\n            lf.save(\n                filename=filename,\n                shapes=shapes,\n                imagePath=imagePath,\n                imageData=imageData,\n                imageHeight=self.image.height(),\n                imageWidth=self.image.width(),\n                otherData=self.otherData,\n                flags=flags,\n            )\n            self.labelFile = lf\n            items = self.fileListWidget.findItems(\n                self.imagePath, Qt.MatchFlag.MatchExactly\n            )\n            if len(items) > 0:\n                if len(items) != 1:\n                    raise RuntimeError(\"There are duplicate files.\")\n                items[0].setCheckState(Qt.CheckState.Checked)\n            # disable allows next and previous image to proceed\n            return True\n        except LabelFileError as e:\n            self.errorMessage(\n                self.tr(\"Error saving label data\"), self.tr(\"<b>%s</b>\") % e\n            )\n            return False\n\n    def copySelectedShape(self):\n        added_shapes = self.canvas.copySelectedShapes()\n        self.labelList.clearSelection()\n        for shape in added_shapes:\n            self.addLabel(shape)\n        self.setDirty()\n\n    def labelSelectionChanged(self):\n        if self._noSelectionSlot:\n            return\n        if self.canvas.editing():\n            selected_shapes = []\n            for item in self.labelList.selectedItems():\n                selected_shapes.append(item.shape())\n            if selected_shapes:\n                self.canvas.selectShapes(selected_shapes)\n            else:\n                self.canvas.deSelectShape()\n\n    def labelItemChanged(self, item):\n        shape = item.shape()\n        self.canvas.setShapeVisible(shape, item.checkState() == Qt.CheckState.Checked)\n\n    def labelOrderChanged(self):\n        self.setDirty()\n        self.canvas.loadShapes([item.shape() for item in self.labelList])\n\n    # Callback functions:\n\n    def newShape(self):\n        \"\"\"Pop-up and give focus to the label editor.\n        position MUST be in global coordinates.\n        \"\"\"\n        items = self.uniqLabelList.selectedItems()\n        text = None\n        if items:\n            text = items[0].data(Qt.ItemDataRole.UserRole)\n        flags = {}\n        group_id = None\n        if self._config[\"display_label_popup\"] or not text:\n            previous_text = self.labelDialog.edit.text()\n            text, flags, group_id, content = self.labelDialog.popUp(text)\n            if not text:\n                self.labelDialog.edit.setText(previous_text)\n\n        if text and not self.validateLabel(text):\n            self.errorMessage(\n                self.tr(\"Invalid label\"),\n                self.tr(\"Invalid label '{}' with validation type '{}'\").format(\n                    text, self._config[\"validate_label\"]\n                ),\n            )\n            text = \"\"\n\n        if text == \"SAM instance\":\n            text = \"SAM instance - confirmed\"\n\n        if self.current_annotation_mode == \"video\":\n            group_id, text = getIDfromUser_UI.PopUp(self, group_id, text)\n\n        if text:\n\n            if group_id is None:\n                group_id = self.minID\n                self.minID -= 1\n            else:\n                self.minID = min(self.minID, group_id - 1)\n\n            if self.canvas.SAM_mode == \"finished\":\n                self.current_sam_shape[\"label\"] = text\n                self.current_sam_shape[\"group_id\"] = group_id\n            else:\n                self.labelList.clearSelection()\n                # shape below is of type qt shape\n                shape = self.canvas.setLastLabel(text, flags)\n                shape.group_id = group_id\n                shape.content = content\n                self.addLabel(shape)\n                self.rec_frame_for_id(group_id, self.INDEX_OF_CURRENT_FRAME)\n\n            self.actions.editMode.setEnabled(True)\n            self.actions.undoLastPoint.setEnabled(False)\n            self.actions.undo.setEnabled(True)\n            self.setDirty()\n\n            self.refresh_image_MODE()\n\n        else:\n            if self.canvas.SAM_mode == \"finished\":\n                self.current_sam_shape[\"label\"] = text\n                self.current_sam_shape[\"group_id\"] = -1\n                self.canvas.SAM_mode = \"\"\n            else:\n                self.canvas.undoLastLine()\n                self.canvas.shapesBackups.pop()\n\n        if self.current_annotation_mode == \"video\":\n            self.update_current_frame_annotation_button_clicked()\n            self.update_current_frame_annotation_button_clicked()\n\n    def scrollRequest(self, delta, orientation):\n        units = -delta * 0.1  # natural scroll\n        bar = self.scrollBars[orientation]\n        value = bar.value() + bar.singleStep() * units\n        self.setScroll(orientation, value)\n\n    def setScroll(self, orientation, value):\n        self.scrollBars[orientation].setValue(value)\n        self.scroll_values[orientation][self.filename] = value\n\n    def setZoom(self, value):\n        self.actions.fitWidth.setChecked(False)\n        self.actions.fitWindow.setChecked(False)\n        self.zoomMode = self.MANUAL_ZOOM\n        self.zoomWidget.setValue(value)\n        self.zoom_values[self.filename] = (self.zoomMode, value)\n\n    def addZoom(self, increment=1.1):\n        zoom_value = self.zoomWidget.value() * increment\n        if increment > 1:\n            zoom_value = math.ceil(zoom_value)\n        else:\n            zoom_value = math.floor(zoom_value)\n        self.setZoom(zoom_value)\n\n    def zoomRequest(self, delta, pos):\n        canvas_width_old = self.canvas.width()\n        units = 1.1\n        if delta < 0:\n            units = 0.9\n        self.addZoom(units)\n\n        canvas_width_new = self.canvas.width()\n        if canvas_width_old != canvas_width_new:\n            canvas_scale_factor = canvas_width_new / canvas_width_old\n\n            x_shift = round(pos.x() * canvas_scale_factor) - pos.x()\n            y_shift = round(pos.y() * canvas_scale_factor) - pos.y()\n\n            self.setScroll(\n                Qt.Orientation.Horizontal,\n                self.scrollBars[Qt.Orientation.Horizontal].value() + x_shift,\n            )\n            self.setScroll(\n                Qt.Orientation.Vertical,\n                self.scrollBars[Qt.Orientation.Vertical].value() + y_shift,\n            )\n\n    def setFitWindow(self, value=True):\n        if value:\n            self.actions.fitWidth.setChecked(False)\n        self.zoomMode = self.FIT_WINDOW if value else self.MANUAL_ZOOM\n        self.adjustScale()\n\n    def setFitWidth(self, value=True):\n        if value:\n            self.actions.fitWindow.setChecked(False)\n        self.zoomMode = self.FIT_WIDTH if value else self.MANUAL_ZOOM\n        self.adjustScale()\n\n    def onNewBrightnessContrast(self, qimage):\n        self.canvas.loadPixmap(\n            QtGui.QPixmap.fromImage(qimage), clear_shapes=False\n        )\n\n    def enable_show_cross_line(self, enabled):\n        self._config[\"show_cross_line\"] = enabled\n        self.actions.show_cross_line.setChecked(enabled)\n        self.canvas.set_show_cross_line(enabled)\n\n    def brightnessContrast(self, value):\n        dialog = BrightnessContrastDialog(\n            utils.img_data_to_pil(self.imageData),\n            self.onNewBrightnessContrast,\n            parent=self,\n        )\n        brightness, contrast = self.brightnessContrast_values.get(\n            self.filename, (None, None)\n        )\n        if brightness is not None:\n            dialog.slider_brightness.setValue(brightness)\n        if contrast is not None:\n            dialog.slider_contrast.setValue(contrast)\n        dialog.exec()\n\n        brightness = dialog.slider_brightness.value()\n        contrast = dialog.slider_contrast.value()\n        self.brightnessContrast_values[self.filename] = (brightness, contrast)\n\n    def togglePolygons(self, value):\n        for item in self.labelList:\n            item.setCheckState(Qt.CheckState.Checked if value else Qt.CheckState.Unchecked)\n\n    def loadFile(self, filename=None):\n        \"\"\"Load the specified file, or the last opened file if None.\"\"\"\n        # changing fileListWidget loads file\n        if filename in self.imageList and (\n            self.fileListWidget.currentRow() != self.imageList.index(filename)\n        ):\n            self.fileListWidget.setCurrentRow(self.imageList.index(filename))\n            self.fileListWidget.repaint()\n            return\n\n        self.resetState()\n        self.canvas.setEnabled(False)\n        if filename is None:\n            filename = self.settings.value(\"filename\", \"\")\n        filename = str(filename)\n        if not QtCore.QFile.exists(filename):\n            print(f\"File {filename} does not exist\")\n            self.errorMessage(\n                self.tr(\"Error opening file\"),\n                self.tr(\"No such file: <b>%s</b>\") % filename,\n            )\n            return False\n        # assumes same name, but json extension\n        self.status(self.tr(\"Loading %s...\") % osp.basename(str(filename)))\n        label_file = osp.splitext(filename)[0] + \".json\"\n        if self.output_dir:\n            label_file_without_path = osp.basename(label_file)\n            label_file = osp.join(self.output_dir, label_file_without_path)\n        if QtCore.QFile.exists(label_file) and LabelFile.is_label_file(\n            label_file\n        ):\n            try:\n                self.labelFile = LabelFile(label_file)\n            except LabelFileError as e:\n                self.errorMessage(\n                    self.tr(\"Error opening file\"),\n                    self.tr(\n                        \"<p><b>%s</b></p>\"\n                        \"<p>Make sure <i>%s</i> is a valid label file.\"\n                    )\n                    % (e, label_file),\n                )\n                self.status(self.tr(\"Error reading %s\") % label_file)\n                return False\n            self.imageData = self.labelFile.imageData\n            self.imagePath = osp.join(\n                osp.dirname(label_file),\n                self.labelFile.imagePath,\n            )\n            self.otherData = self.labelFile.otherData\n        else:\n            self.imageData = LabelFile.load_image_file(filename)\n            if self.imageData:\n                self.imagePath = filename\n            self.labelFile = None\n        image = QtGui.QImage.fromData(self.imageData)\n\n        if image.isNull():\n            formats = [\n                \"*.{}\".format(fmt.data().decode())\n                for fmt in QtGui.QImageReader.supportedImageFormats()\n            ]\n            self.errorMessage(\n                self.tr(\"Error opening file\"),\n                self.tr(\n                    \"<p>Make sure <i>{0}</i> is a valid image file.<br/>\"\n                    \"Supported image formats: {1}</p>\"\n                ).format(filename, \",\".join(formats)),\n            )\n            self.status(self.tr(\"Error reading %s\") % filename)\n            return False\n        self.image = image\n        self.CURRENT_FRAME_IMAGE = cv2.imread(filename)\n        self.filename = filename\n        if self._config[\"keep_prev\"]:\n            prev_shapes = self.canvas.shapes\n\n        self.canvas.loadPixmap(QtGui.QPixmap.fromImage(image))\n        flags = {k: False for k in self._config[\"flags\"] or []}\n        if self.labelFile:\n            self.actions.export.setEnabled(True)\n            self.CURRENT_SHAPES_IN_IMG = self.labelFile.shapes\n            self.canvas.loadPixmap(QtGui.QPixmap.fromImage(image))\n            self.loadLabels(self.labelFile.shapes)\n            if self.labelFile.flags is not None:\n                flags.update(self.labelFile.flags)\n        self.loadFlags(flags)\n        if self._config[\"keep_prev\"] and self.noShapes():\n            self.loadShapes(prev_shapes, replace=False)\n            self.setDirty()\n        else:\n            self.setClean()\n\n        self.canvas.setEnabled(True)\n        # set zoom values\n        is_initial_load = not self.zoom_values\n        if self.filename in self.zoom_values:\n            self.zoomMode = self.zoom_values[self.filename][0]\n            self.setZoom(self.zoom_values[self.filename][1])\n        elif is_initial_load or not self._config[\"keep_prev_scale\"]:\n            self.adjustScale(initial=True)\n        # set scroll values\n        for orientation in self.scroll_values:\n            if self.filename in self.scroll_values[orientation]:\n                self.setScroll(\n                    orientation, self.scroll_values[orientation][self.filename]\n                )\n        # after loading the image, clear SAM instance if exists\n        if self.sam_predictor is not None:\n            self.sam_predictor.clear_logit()\n            self.canvas.SAM_coordinates = []\n        # set brightness constrast values\n        dialog = BrightnessContrastDialog(\n            utils.img_data_to_pil(self.imageData),\n            self.onNewBrightnessContrast,\n            parent=self,\n        )\n        brightness, contrast = self.brightnessContrast_values.get(\n            self.filename, (None, None)\n        )\n        if self._config[\"keep_prev_brightness\"] and self.recentFiles:\n            brightness, _ = self.brightnessContrast_values.get(\n                self.recentFiles[0], (None, None)\n            )\n        if self._config[\"keep_prev_contrast\"] and self.recentFiles:\n            _, contrast = self.brightnessContrast_values.get(\n                self.recentFiles[0], (None, None)\n            )\n        if brightness is not None:\n            dialog.slider_brightness.setValue(brightness)\n        if contrast is not None:\n            dialog.slider_contrast.setValue(contrast)\n        self.brightnessContrast_values[self.filename] = (brightness, contrast)\n        if brightness is not None or contrast is not None:\n            dialog.onNewValue(None)\n        self.paintCanvas()\n        self.addRecentFile(self.filename)\n        self.toggleActions(True)\n        self.canvas.setFocus()\n        self.status(self.tr(\"Loaded %s\") % osp.basename(str(filename)))\n        return True\n\n    def resizeEvent(self, event):\n        if (\n            self.canvas\n            and not self.image.isNull()\n            and self.zoomMode != self.MANUAL_ZOOM\n        ):\n            self.adjustScale()\n        super(MainWindow, self).resizeEvent(event)\n\n    def paintCanvas(self):\n        assert not self.image.isNull(), \"cannot paint null image\"\n        self.canvas.scale = 0.01 * self.zoomWidget.value()\n        self.canvas.adjustSize()\n        self.canvas.update()\n\n    def adjustScale(self, initial=False):\n        value = self.scalers[self.FIT_WINDOW if initial else self.zoomMode]()\n        value = int(100 * value)\n        self.zoomWidget.setValue(value)\n        self.zoom_values[self.filename] = (self.zoomMode, value)\n\n    def scaleFitWindow(self):\n        \"\"\"Figure out the size of the pixmap to fit the main widget.\"\"\"\n        e = 2.0  # So that no scrollbars are generated.\n        w1 = self.centralWidget().width() - e\n        h1 = self.centralWidget().height() - e\n        a1 = w1 / h1\n        # Calculate a new scale value based on the pixmap's aspect ratio.\n        w2 = self.canvas.pixmap.width() - 0.0\n        h2 = self.canvas.pixmap.height() - 0.0\n        a2 = w2 / h2\n        return w1 / w2 if a2 >= a1 else h1 / h2\n\n    def scaleFitWidth(self):\n        # The epsilon does not seem to work too well here.\n        w = self.centralWidget().width() - 2.0\n        return w / self.canvas.pixmap.width()\n\n    def enableSaveImageWithData(self, enabled):\n        self._config[\"store_data\"] = enabled\n        self.actions.saveWithImageData.setChecked(enabled)\n\n    def closeEvent(self, event):\n        if not self.mayContinue():\n            event.ignore()\n        else:\n            self.Escape_clicked()\n        self.settings.setValue(\n            \"filename\", self.filename if self.filename else \"\"\n        )\n        self.settings.setValue(\"window/size\", self.size())\n        self.settings.setValue(\"window/position\", self.pos())\n        self.settings.setValue(\"window/state\", self.saveState())\n        self.settings.setValue(\"recentFiles\", self.recentFiles)\n        # ask the use for where to save the labels\n        # self.settings.setValue('window/geometry', self.saveGeometry())\n\n    def dragEnterEvent(self, event):\n        extensions = [\n            \".%s\" % fmt.data().decode().lower()\n            for fmt in QtGui.QImageReader.supportedImageFormats()\n        ]\n        if event.mimeData().hasUrls():\n            items = [i.toLocalFile() for i in event.mimeData().urls()]\n            if any([i.lower().endswith(tuple(extensions)) for i in items]):\n                event.accept()\n        else:\n            event.ignore()\n\n    def dropEvent(self, event):\n        if not self.mayContinue():\n            event.ignore()\n            return\n        items = [i.toLocalFile() for i in event.mimeData().urls()]\n        self.importDroppedImageFiles(items)\n\n    # User Dialogs #\n\n    def loadRecent(self, filename):\n        if self.mayContinue():\n            self.loadFile(filename)\n\n    def change_curr_model(self, model_name):\n        \"\"\"\n        Summary:\n            Change current model to the model_name\n\n        Args:\n            model_name (str): name of the model to be changed to\n        \"\"\"\n\n        self.multi_model_flag = False\n        self.waitWindow(\n            visible=True, text=f'Please Wait.\\n{model_name} is being Loaded...')\n        self.intelligenceHelper.current_model_name, self.intelligenceHelper.current_mm_model = self.intelligenceHelper.make_mm_model(\n            model_name)\n        self.waitWindow()\n\n    def model_explorer(self):\n        \"\"\"\n        Summary:\n            Open model explorer dialog to select or download models\n        \"\"\"\n        self._config = get_config()\n        model_explorer_dialog = utils.ModelExplorerDialog(\n            self, self._config[\"mute\"], notification.PopUp)\n        # make it fit its contents\n        model_explorer_dialog.adjustSize()\n        model_explorer_dialog.setMinimumWidth(\n            model_explorer_dialog.table.width() * 1.5)\n        model_explorer_dialog.setMinimumHeight(\n            model_explorer_dialog.table.rowHeight(0) * 10)\n        model_explorer_dialog.exec()\n        # init intelligence again if it's the first model\n        if self.helper_first_time_flag:\n            try:\n                self.intelligenceHelper = Intelligence(self)\n            except:\n                print(\n                    \"it seems you have a problem with initializing model\\ncheck you have at least one model\")\n                self.helper_first_time_flag = True\n            else:\n                self.helper_first_time_flag = False\n        mathOps.update_saved_models_json(os.getcwd())\n\n        selected_model_name, config, checkpoint = model_explorer_dialog.selected_model\n        if selected_model_name != -1:\n            self.intelligenceHelper.current_model_name, self.intelligenceHelper.current_mm_model = self.intelligenceHelper.make_mm_model_more(\n                selected_model_name, config, checkpoint)\n        self.updateSamControls()\n\n    def openNextImg(self, _value=False, load=True):\n        self.refresh_image_MODE()\n        keep_prev = self._config[\"keep_prev\"]\n        if not self.mayContinue():\n            return\n\n        if len(self.imageList) <= 0:\n            return\n\n        filename = None\n        if self.filename is None:\n            filename = self.imageList[0]\n        else:\n            currIndex = self.imageList.index(self.filename)\n            if currIndex + 1 < len(self.imageList):\n                filename = self.imageList[currIndex + 1]\n            else:\n                filename = self.imageList[-1]\n        self.filename = filename\n\n        if self.filename and load:\n            self.loadFile(self.filename)\n\n        self._config[\"keep_prev\"] = keep_prev\n        self.refresh_image_MODE()\n\n    def openFile(self, _value=False):\n\n        self.actions.export.setEnabled(False)\n        try:\n            cv2.destroyWindow('video processing')\n        except:\n            pass\n        if not self.mayContinue():\n            return\n        path = osp.dirname(str(self.filename)) if self.filename else \".\"\n        formats = [\n            \"*.{}\".format(fmt.data().decode())\n            for fmt in QtGui.QImageReader.supportedImageFormats()\n        ]\n        filters = self.tr(\"Image & Label files (%s)\") % \" \".join(\n            formats + [\"*%s\" % LabelFile.suffix]\n        )\n        filename = QtWidgets.QFileDialog.getOpenFileName(\n            self,\n            self.tr(\"%s - Choose Image or Label file\") % __appname__,\n            path,\n            filters,\n        )\n        filename, _ = filename\n        filename = str(filename)\n        if filename:\n            self.reset_for_new_mode(\"img\")\n            self.loadFile(filename)\n            self.refresh_image_MODE()\n            self.set_video_controls_visibility(False)\n\n        self.filename = filename\n        # clear the file list widget\n        self.fileListWidget.clear()\n        self.uniqLabelList.clear()\n        # enable Visualization Options\n        for option in self.vis_options:\n            if option in [self.id_checkBox, self.traj_checkBox, self.trajectory_length_lineEdit]:\n                option.setEnabled(False)\n            else:\n                option.setEnabled(True)\n\n    def changeOutputDirDialog(self, _value=False):\n        default_output_dir = self.output_dir\n        if default_output_dir is None and self.filename:\n            default_output_dir = osp.dirname(self.filename)\n        if default_output_dir is None:\n            default_output_dir = self.currentPath()\n\n        output_dir = QtWidgets.QFileDialog.getExistingDirectory(\n            self,\n            self.tr(\"%s - Save/Load Annotations in Directory\") % __appname__,\n            default_output_dir,\n            QtWidgets.QFileDialog.Option.ShowDirsOnly\n            | QtWidgets.QFileDialog.Option.DontResolveSymlinks,\n        )\n        output_dir = str(output_dir)\n\n        if not output_dir:\n            return\n\n        self.output_dir = output_dir\n\n        self.statusBar().showMessage(\n            self.tr(\"%s . Annotations will be saved/loaded in %s\")\n            % (\"Change Annotations Dir\", self.output_dir)\n        )\n        self.statusBar().show()\n\n        current_filename = self.filename\n        self.importDirImages(self.lastOpenDir, load=False)\n\n        if current_filename in self.imageList:\n            # retain currently selected file\n            self.fileListWidget.setCurrentRow(\n                self.imageList.index(current_filename)\n            )\n            self.fileListWidget.repaint()\n\n    def saveFile(self, _value=False):\n        assert not self.image.isNull(), \"cannot save empty image\"\n        if self.labelFile:\n            # DL20180323 - overwrite when in directory\n            self.save_path = self.labelFile.filename\n            self._saveFile(self.save_path)\n        elif self.output_file:\n            self.save_path = self.output_file\n            self._saveFile(self.save_path)\n            self.close()\n        else:\n            self.save_path = self.saveFileDialog()\n            self._saveFile(self.save_path)\n        if self.save_path is not None and self.save_path != \"\":\n            self.actions.export.setEnabled(True)\n\n    def exportData(self):\n        \"\"\"\n        Export data to COCO, MOT, video, and custom exports, depending on the current annotation mode.\n\n        If the current annotation mode is \"video\", the function prompts the user to select which types of exports to perform\n        (COCO, MOT, video, and/or custom exports), and then prompts the user to select the output file path for each export type\n        that was selected. The function then exports the data to the selected file paths.\n\n        If the current annotation mode is \"img\" or \"dir\", the function prompts the user to select the output file path for a COCO\n        export, and then exports the data to the selected file path.\n\n        If an error occurs during the export process, the function displays an error message. Otherwise, the function displays\n        a success message.\n        \"\"\"\n        try:\n            if self.current_annotation_mode == \"video\":\n                # Get user input for export options\n                result, coco_radio, mot_radio, video_radio, custom_exports_radio_checked_list = exportData_UI.PopUp()\n                if not result:\n                    return\n\n                json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n\n                pth = \"\"\n                # Check which radio button is checked and export accordingly\n                if video_radio:\n                    # Get user input for video export path\n                    folderDialog = utils.FolderDialog(\n                        \"tracking_results.mp4\", \"mp4\")\n                    if folderDialog.exec():\n                        pth = self.export_as_video_button_clicked(\n                            folderDialog.selectedFiles()[0])\n                    else:\n                        return\n                if coco_radio:\n                    # Get user input for COCO export path\n                    folderDialog = utils.FolderDialog(\"coco.json\", \"json\")\n                    if folderDialog.exec():\n                        pth = utils.exportCOCOvid(\n                            json_file_name, self.CURRENT_VIDEO_WIDTH, self.CURRENT_VIDEO_HEIGHT, folderDialog.selectedFiles()[0])\n                    else:\n                        return\n                if mot_radio:\n                    # Get user input for MOT export path\n                    folderDialog = utils.FolderDialog(\"mot.txt\", \"txt\")\n                    if folderDialog.exec():\n                        pth = utils.exportMOT(\n                            json_file_name, folderDialog.selectedFiles()[0])\n                    else:\n                        return\n                # custom exports\n                custom_exports_list_video = [\n                    custom_export for custom_export in custom_exports_list if custom_export.mode == \"video\"]\n                if len(custom_exports_radio_checked_list) != 0:\n                    for i in range(len(custom_exports_radio_checked_list)):\n                        if custom_exports_radio_checked_list[i]:\n                            # Get user input for custom export path\n                            folderDialog = utils.FolderDialog(\n                                f\"{custom_exports_list_video[i].file_name}.{custom_exports_list_video[i].format}\", custom_exports_list_video[i].format)\n                            if folderDialog.exec():\n                                try:\n                                    pth = custom_exports_list_video[i](\n                                        json_file_name, self.CURRENT_VIDEO_WIDTH, self.CURRENT_VIDEO_HEIGHT, folderDialog.selectedFiles()[0])\n                                except Exception as e:\n                                    MsgBox.OKmsgBox(\n                                        f\"Error\", f\"Error: with custom export {custom_exports_list_video[i].button_name}\\n check the parameters matches the specified ones in custom_exports.py\\n Error Message: {e}\", \"critical\")\n                            else:\n                                return\n\n            # Image and Directory modes\n            elif self.current_annotation_mode == \"img\" or self.current_annotation_mode == \"dir\":\n                result, coco_radio, custom_exports_radio_checked_list = exportData_UI.PopUp(\n                    mode=\"image\")\n                if not result:\n                    return\n                save_path = self.save_path if self.save_path else self.labelFile.filename\n                json_paths = utils.parse_img_export(self.target_directory, save_path)\n                # Check which radio button is checked and export accordingly\n                # COCO export\n                if coco_radio:\n                    # Get user input for COCO export path\n                    folderDialog = utils.FolderDialog(\"coco.json\", \"json\")\n                    if folderDialog.exec():\n                        pth = utils.exportCOCO(\n                            json_paths, folderDialog.selectedFiles()[0])\n                    else:\n                        return\n                # custom exports\n                custom_exports_list_image = [\n                    custom_export for custom_export in custom_exports_list if custom_export.mode == \"image\"]\n                if len(custom_exports_radio_checked_list) != 0:\n                    for i in range(len(custom_exports_radio_checked_list)):\n                        if custom_exports_radio_checked_list[i]:\n                            # Get user input for custom export path\n                            folderDialog = utils.FolderDialog(\n                                f\"{custom_exports_list_image[i].file_name}.{custom_exports_list_image[i].format}\", custom_exports_list_image[i].format)\n                            if folderDialog.exec():\n                                try:\n                                    pth = custom_exports_list_image[i](\n                                        json_paths, folderDialog.selectedFiles()[0])\n                                except Exception as e:\n                                    MsgBox.OKmsgBox(\n                                        f\"Error\", f\"Error: with custom export {custom_exports_list_image[i].button_name}\\n check the parameters matches the specified ones in custom_exports.py\\n Error Message: {e}\", \"critical\")\n                            else:\n                                return\n\n        except Exception as e:\n            # Error QMessageBox\n            msg = QtWidgets.QMessageBox()\n            msg.setIcon(QtWidgets.QMessageBox.Icon.Critical)\n            msg.setText(f\"Error\\n {e}\")\n            msg.setWindowTitle(\n                \"Export Error\")\n            # print exception and error line to terminal\n            print(e)\n            msg.setStandardButtons(QtWidgets.QMessageBox.StandardButton.Ok)\n            msg.exec()\n            return\n        else:\n            # display QMessageBox with ok button and label \"Exporting COCO\"\n            msg = QtWidgets.QMessageBox()\n            try:\n                if pth not in [\"\", None, False]:\n                    msg.setIcon(QtWidgets.QMessageBox.Icon.Information)\n                    msg.setText(f\"Annotations exported successfully to {pth}\")\n                    msg.setWindowTitle(\"Export Success\")\n                else:\n                    msg.setIcon(QtWidgets.QMessageBox.Icon.Critical)\n                    msg.setText(f\"Export Failed\")\n                    msg.setWindowTitle(\"Export Failed\")\n            except:\n                msg.setIcon(QtWidgets.QMessageBox.Icon.Critical)\n                msg.setText(f\"Export Failed\")\n                msg.setWindowTitle(\"Export Failed\")\n            msg.setStandardButtons(QtWidgets.QMessageBox.StandardButton.Ok)\n            msg.exec()\n\n    def saveFileAs(self, _value=False):\n        self.actions.export.setEnabled(True)\n        assert not self.image.isNull(), \"cannot save empty image\"\n        self.save_path = self.saveFileDialog()\n        self._saveFile(self.save_path)\n\n    def saveFileDialog(self):\n        caption = self.tr(\"%s - Choose File\") % __appname__\n        filters = self.tr(\"Label files (*%s)\") % LabelFile.suffix\n        if self.output_dir:\n            dlg = QtWidgets.QFileDialog(\n                self, caption, self.output_dir, filters\n            )\n        else:\n            dlg = QtWidgets.QFileDialog(\n                self, caption, self.currentPath(), filters\n            )\n        dlg.setDefaultSuffix(LabelFile.suffix[1:])\n        dlg.setAcceptMode(QtWidgets.QFileDialog.AcceptMode.AcceptSave)\n        dlg.setOption(QtWidgets.QFileDialog.Option.DontConfirmOverwrite, False)\n        dlg.setOption(QtWidgets.QFileDialog.Option.DontUseNativeDialog, False)\n        basename = osp.basename(osp.splitext(self.filename)[0])\n        if self.output_dir:\n            default_labelfile_name = osp.join(\n                self.output_dir, basename + LabelFile.suffix\n            )\n        else:\n            default_labelfile_name = osp.join(\n                self.currentPath(), basename + LabelFile.suffix\n            )\n        filename = dlg.getSaveFileName(\n            self,\n            self.tr(\"Choose File\"),\n            default_labelfile_name,\n            self.tr(\"Label files (*%s)\") % LabelFile.suffix,\n        )\n        if isinstance(filename, tuple):\n            filename, _ = filename\n        return filename\n\n    def _saveFile(self, filename):\n        if filename and self.saveLabels(filename):\n            self.addRecentFile(filename)\n            self.setClean()\n\n    def closeFile(self, _value=False):\n        if not self.mayContinue():\n            return\n        self.resetState()\n        self.setClean()\n        self.toggleActions(False)\n        self.canvas.setEnabled(False)\n        self.actions.saveAs.setEnabled(False)\n\n        # clear the file list widget\n        self.fileListWidget.clear()\n        self.uniqLabelList.clear()\n\n        self.current_annotation_mode = \"\"\n        self.right_click_menu()\n\n        for option in self.vis_options:\n            option.setEnabled(False)\n\n    def getLabelFile(self):\n        if self.filename.lower().endswith(\".json\"):\n            label_file = self.filename\n        else:\n            label_file = osp.splitext(self.filename)[0] + \".json\"\n\n        return label_file\n\n    def deleteFile(self):\n        mb = QtWidgets.QMessageBox\n        msg = self.tr(\n            \"You are about to permanently delete this label file, \"\n            \"proceed anyway?\"\n        )\n        answer = mb.warning(self, self.tr(\"Attention\"), msg, mb.StandardButton.Yes | mb.StandardButton.No)\n        if answer != mb.StandardButton.Yes:\n            return\n\n        label_file = self.getLabelFile()\n        if osp.exists(label_file):\n            os.remove(label_file)\n            logger.info(\"Label file is removed: {}\".format(label_file))\n\n            item = self.fileListWidget.currentItem()\n            item.setCheckState(Qt.CheckState.Unchecked)\n\n            self.resetState()\n\n    # Message Dialogs. #\n    def hasLabels(self):\n        if self.noShapes():\n            self.errorMessage(\n                \"No objects labeled\",\n                \"You must label at least one object to save the file.\",\n            )\n            return False\n        return True\n\n    def hasLabelFile(self):\n        if self.filename is None:\n            return False\n\n        label_file = self.getLabelFile()\n        return osp.exists(label_file)\n\n    def mayContinue(self):\n        if not self.dirty:\n            return True\n        mb = QtWidgets.QMessageBox\n        msg = self.tr('Save annotations to \"{}\" before closing?').format(\n            self.filename\n        )\n        answer = mb.question(\n            self,\n            self.tr(\"Save annotations?\"),\n            msg,\n            mb.StandardButton.Save | mb.StandardButton.Discard | mb.StandardButton.Cancel,\n            mb.StandardButton.Save,\n        )\n        if answer == mb.StandardButton.Discard:\n            return True\n        elif answer == mb.StandardButton.Save:\n            self.saveFile()\n            return True\n        else:  # answer == mb.Cancel\n            return False\n\n    def errorMessage(self, title, message):\n        msg_box = QtWidgets.QMessageBox(QtWidgets.QMessageBox.Icon.Critical, title, message)\n        msg_box.setStandardButtons(QtWidgets.QMessageBox.StandardButton.Ok)\n        return msg_box\n\n    def currentPath(self):\n        return osp.dirname(str(self.filename)) if self.filename else \".\"\n\n    def toggleKeepPrevMode(self):\n        self._config[\"keep_prev\"] = not self._config[\"keep_prev\"]\n\n    def removeSelectedPoint(self):\n        self.canvas.removeSelectedPoint()\n        if not self.canvas.hShape.points:\n            self.canvas.deleteShape(self.canvas.hShape)\n            self.remLabels([self.canvas.hShape])\n            self.setDirty()\n            if self.noShapes():\n                for action in self.actions.onShapesPresent:\n                    action.setEnabled(False)\n\n    def deleteSelectedShape(self):\n        try:\n            if len(self.canvas.selectedShapes) == 0:\n                return\n\n            yes, no = QtWidgets.QMessageBox.StandardButton.Yes, QtWidgets.QMessageBox.StandardButton.No\n            msg = self.tr(\n                \"You are about to permanently delete {} polygons, \"\n                \"proceed anyway?\"\n            ).format(len(self.canvas.selectedShapes))\n            if yes == QtWidgets.QMessageBox.warning(\n                self, self.tr(\"Attention\"), msg, yes | no, yes\n            ):\n                deleted_shapes = self.canvas.deleteSelected()\n                deleted_ids = [shape.group_id for shape in deleted_shapes]\n                self.remLabels(deleted_shapes)\n                self.setDirty()\n                if self.noShapes():\n                    for action in self.actions.onShapesPresent:\n                        action.setEnabled(False)\n                if self.current_annotation_mode == 'img' or self.current_annotation_mode == 'dir':\n                    self.refresh_image_MODE()\n                    return\n\n                # if video mode\n                result, self.featuresOptions, fromFrameVAL, toFrameVAL = deleteSelectedShape_UI.PopUp(\n                    self.TOTAL_VIDEO_FRAMES,\n                    self.INDEX_OF_CURRENT_FRAME,\n                    self.featuresOptions)\n                if result == QtWidgets.QDialog.DialogCode.Accepted:\n                    for deleted_id in deleted_ids:\n                        self.delete_ids_from_all_frames(\n                            [deleted_id], from_frame=fromFrameVAL, to_frame=toFrameVAL)\n\n                self.main_video_frames_slider_changed()\n        except Exception as e:\n            MsgBox.OKmsgBox(f\"Error\", f\"Error: {e}\", \"critical\")\n\n    def delete_ids_from_all_frames(self, deleted_ids, from_frame, to_frame):\n        \"\"\"\n        Summary:\n            Delete ids from a range of frames\n\n        Args:\n            deleted_ids (list): list of ids to be deleted\n            from_frame (int): starting frame\n            to_frame (int): ending frame\n        \"\"\"\n\n        from_frame, to_frame = np.min(\n            [from_frame, to_frame]), np.max([from_frame, to_frame])\n        listObj = self.load_objects_from_json__orjson()\n\n        for i in range(from_frame - 1, to_frame, 1):\n            frame_idx = listObj[i]['frame_idx']\n            for object_ in listObj[i]['frame_data']:\n                id = object_['tracker_id']\n                if id in deleted_ids:\n                    listObj[i]['frame_data'].remove(object_)\n                    self.CURRENT_ANNOATAION_TRAJECTORIES['id_' +\n                                                         str(id)][frame_idx - 1] = (-1, -1)\n                    self.rec_frame_for_id(id, frame_idx, type_='remove')\n\n        self.load_objects_to_json__orjson(listObj)\n\n    def copyShape(self):\n        \"\"\"\n        Summary:\n            Copy selected shape in right click menu.\n            is NOT saved in the clipboard\n        \"\"\"\n\n        if len(self.canvas.selectedShapes) > 1 and self.current_annotation_mode == 'video':\n            org = copy.deepcopy(self.canvas.shapes)\n            self.canvas.endMove(copy=True)\n            self.canvas.undoLastLine()\n            self.canvas.shapesBackups.pop()\n            self.canvas.shapes = org\n            self.update_current_frame_annotation_button_clicked()\n            return\n\n        elif self.current_annotation_mode == 'video':\n            self.canvas.endMove(copy=True)\n            shape = self.canvas.selectedShapes[0]\n            text = shape.label\n            text, flags, group_id, content = self.labelDialog.popUp(text)\n            shape.group_id = -1\n            shape.content = content\n            shape.label = text\n            shape.flags = flags\n\n            group_id, text = getIDfromUser_UI.PopUp(self, group_id, text)\n\n            if text:\n                self.labelList.clearSelection()\n                shape = self.canvas.setLastLabel(text, flags)\n                shape.group_id = group_id\n                self.addLabel(shape)\n                self.rec_frame_for_id(\n                    shape.group_id, self.INDEX_OF_CURRENT_FRAME)\n                self.actions.editMode.setEnabled(True)\n                self.actions.undoLastPoint.setEnabled(False)\n                self.actions.undo.setEnabled(True)\n                self.setDirty()\n            else:\n                self.canvas.undoLastLine()\n                self.canvas.shapesBackups.pop()\n\n            self.update_current_frame_annotation_button_clicked()\n\n            return\n\n        self.canvas.endMove(copy=True)\n        for shape in self.canvas.selectedShapes:\n            self.addLabel(shape)\n        self.labelList.clearSelection()\n        self.setDirty()\n\n    def moveShape(self):\n        self.canvas.endMove(copy=False)\n        self.setDirty()\n        if self.current_annotation_mode == 'video':\n            self.update_current_frame_annotation_button_clicked()\n\n    def openDirDialog(self, _value=False, dirpath=None):\n        \n        if not self.mayContinue():\n            return\n\n        defaultOpenDirPath = dirpath if dirpath else \".\"\n        if self.lastOpenDir and osp.exists(self.lastOpenDir):\n            defaultOpenDirPath = self.lastOpenDir\n        else:\n            defaultOpenDirPath = (\n                osp.dirname(self.filename) if self.filename else \".\"\n            )\n\n        targetDirPath = str(\n            QtWidgets.QFileDialog.getExistingDirectory(\n                self,\n                self.tr(\"%s - Open Directory\") % __appname__,\n                defaultOpenDirPath,\n                QtWidgets.QFileDialog.Option.ShowDirsOnly\n                | QtWidgets.QFileDialog.Option.DontResolveSymlinks,\n            )\n        )\n        self.target_directory = targetDirPath\n        self.importDirImages(targetDirPath)\n        self.set_video_controls_visibility(False)\n\n        # enable Visualization Options\n        for option in self.vis_options:\n            if option in [self.id_checkBox, self.traj_checkBox, self.trajectory_length_lineEdit]:\n                option.setEnabled(False)\n            else:\n                option.setEnabled(True)\n\n    @property\n    def imageList(self):\n        lst = []\n        for i in range(self.fileListWidget.count()):\n            item = self.fileListWidget.item(i)\n            lst.append(item.text())\n        return lst\n\n    def importDroppedImageFiles(self, imageFiles):\n        extensions = [\n            \".%s\" % fmt.data().decode().lower()\n            for fmt in QtGui.QImageReader.supportedImageFormats()\n        ]\n\n        self.filename = None\n        for file in imageFiles:\n            if file in self.imageList or not file.lower().endswith(\n                tuple(extensions)\n            ):\n                continue\n            label_file = osp.splitext(file)[0] + \".json\"\n            if self.output_dir:\n                label_file_without_path = osp.basename(label_file)\n                label_file = osp.join(self.output_dir, label_file_without_path)\n            item = QtWidgets.QListWidgetItem(file)\n            # item.setFlags(Qt.ItemFlag.ItemIsEnabled | Qt.ItemFlag.ItemIsSelectable)\n            if QtCore.QFile.exists(label_file) and LabelFile.is_label_file(\n                label_file\n            ):\n                item.setCheckState(Qt.CheckState.Checked)\n            else:\n                item.setCheckState(Qt.CheckState.Unchecked)\n            self.fileListWidget.addItem(item)\n\n            self.openNextImg()\n\n    def importDirImages(self, dirpath, pattern=None, load=True):\n\n        self.actions.export.setEnabled(True)\n\n        if not self.mayContinue() or not dirpath:\n            return\n        self.reset_for_new_mode(\"dir\")\n        self.lastOpenDir = dirpath\n        self.filename = None\n        self.fileListWidget.clear()\n        self.uniqLabelList.clear()\n        for filename in self.scanAllImages(dirpath):\n            if pattern and pattern not in filename:\n                continue\n            label_file = osp.splitext(filename)[0] + \".json\"\n            if self.output_dir:\n                label_file_without_path = osp.basename(label_file)\n                label_file = osp.join(self.output_dir, label_file_without_path)\n            item = QtWidgets.QListWidgetItem(filename)\n            # item.setFlags(Qt.ItemFlag.ItemIsEnabled | Qt.ItemFlag.ItemIsSelectable)\n            if QtCore.QFile.exists(label_file) and LabelFile.is_label_file(\n                label_file\n            ):\n                item.setCheckState(Qt.CheckState.Checked)\n            else:\n                item.setCheckState(Qt.CheckState.Unchecked)\n            self.fileListWidget.addItem(item)\n        self.openNextImg(load=load)\n        self.fileListWidget.horizontalScrollBar().setValue(\n            self.fileListWidget.horizontalScrollBar().maximum()\n        )\n\n    def scanAllImages(self, folderPath):\n        extensions = [\n            \".%s\" % fmt.data().decode().lower()\n            for fmt in QtGui.QImageReader.supportedImageFormats()\n        ]\n\n        images = []\n        for root, dirs, files in os.walk(folderPath):\n            for file in files:\n                if file.lower().endswith(tuple(extensions)):\n                    relativePath = osp.join(root, file)\n                    images.append(relativePath)\n        images.sort(key=lambda x: x.lower())\n        return images\n    \n    def refresh_image_MODE(self, fromSignal=False):\n        try:\n            if self.current_annotation_mode == \"video\" and not fromSignal:\n                return\n            self.CURRENT_SHAPES_IN_IMG = mathOps.convert_qt_shapes_to_shapes(self.canvas.shapes)\n            imageX = visualizations.draw_bb_on_image_MODE(self.CURRENT_ANNOATAION_FLAGS,\n                                                          self.image,\n                                                          self.CURRENT_SHAPES_IN_IMG)\n            self.labelList.clear()\n            self.canvas.loadPixmap(QtGui.QPixmap.fromImage(imageX))\n            self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n        except:\n            pass\n\n    def annotate_one(self, called_from_tracking=False):\n\n        areaFlag = len(self.canvas.tracking_area_polygon) > 2\n        if areaFlag:\n            dims = self.CURRENT_FRAME_IMAGE.shape\n            area_points = self.canvas.tracking_area_polygon\n            [x1, y1, x2, y2] = mathOps.track_area_adjustedBboex(\n                area_points, dims, ratio=0.1)\n            targetImage = self.CURRENT_FRAME_IMAGE[y1: y2, x1: x2]\n        else:\n            targetImage = self.CURRENT_FRAME_IMAGE\n\n        try:\n            if self.current_annotation_mode != \"video\":\n                if os.path.exists(self.filename):\n                    self.labelList.clearSelection()\n\n            if self.multi_model_flag:\n                shapes = self.intelligenceHelper.get_shapes_of_one(\n                    targetImage, img_array_flag=True, multi_model_flag=True)\n            else:\n                shapes = self.intelligenceHelper.get_shapes_of_one(\n                    targetImage, img_array_flag=True)\n\n            if areaFlag:\n                shapes = mathOps.adjust_shapes_to_original_image(\n                    shapes, x1, y1, area_points)\n\n            if self.current_annotation_mode == \"video\" and called_from_tracking:\n                return shapes\n\n        except Exception as e:\n            MsgBox.OKmsgBox(\"Error\", f\"{e}\", \"critical\")\n            return\n\n        imageX = visualizations.draw_bb_on_image_MODE(self.CURRENT_ANNOATAION_FLAGS,\n                                                      self.image,\n                                                      shapes)\n\n        # clear shapes already in lablelist (fixes saving multiple shapes of same object bug)\n        self.labelList.clear()\n        self.CURRENT_SHAPES_IN_IMG = shapes\n        self.canvas.loadPixmap(QtGui.QPixmap.fromImage(imageX))\n        self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n        self.actions.editMode.setEnabled(True)\n        self.actions.undoLastPoint.setEnabled(False)\n        self.actions.undo.setEnabled(True)\n        self.setDirty()\n\n    def annotate_batch(self):\n        images = []\n        self._config = get_config()\n        notif = [self._config[\"mute\"], self, notification.PopUp]\n        for filename in self.imageList:\n            images.append(filename)\n        if self.multi_model_flag:\n            self.intelligenceHelper.get_shapes_of_batch(\n                images, multi_model_flag=True, notif=notif)\n        else:\n            self.intelligenceHelper.get_shapes_of_batch(images, notif=notif)\n\n    def setConfThreshold(self):\n        # if a threshold exists, pass it as the previous value\n        if self.intelligenceHelper.conf_threshold:\n            self.intelligenceHelper.conf_threshold = self.segmentation_options_UI.setConfThreshold(\n                self.intelligenceHelper.conf_threshold)\n        # if not, use the default value in the function as the previous value\n        else:\n            self.intelligenceHelper.conf_threshold = self.segmentation_options_UI.setConfThreshold()\n\n    def setIOUThreshold(self):\n        # if a threshold exists, pass it as the previous value\n        if self.intelligenceHelper.iou_threshold:\n            self.intelligenceHelper.iou_threshold = self.segmentation_options_UI.setIOUThreshold(\n                self.intelligenceHelper.iou_threshold)\n        # if not, use the default value in the function as the previous value\n        else:\n            self.intelligenceHelper.iou_threshold = self.segmentation_options_UI.setIOUThreshold()\n\n    def selectClasses(self):\n        print(\" from intelligenceHelper:\" + str(self.intelligenceHelper.selectedclasses))\n        self.intelligenceHelper.selectedclasses = self.segmentation_options_UI.selectClasses()\n\n    def mergeSegModels(self):\n        print(\" from intelligenceHelper:\" + str(self.intelligenceHelper.selectedmodels))\n        self.intelligenceHelper.selectedmodels = self.merge_feature_UI.mergeSegModels()\n        # check if the user selected any models\n        if len(self.intelligenceHelper.selectedmodels) == 0:\n            print(\"No models selected\")\n        else:\n            self.multi_model_flag = True\n\n    def Segment_anything(self):\n        # check the visibility of the sam toolbar\n        if self.sam_toolbar.isVisible():\n            self.set_sam_toolbar_visibility(False)\n        else:\n            self.set_sam_toolbar_visibility(True)\n\n    # VIDEO PROCESSING FUNCTIONS (ALL CONNECTED TO THE VIDEO PROCESSING TOOLBAR)\n\n    def calculate_trajectories(self, frames=None):\n        \"\"\"\n        Summary:\n            Calculate trajectories for all objects in the video\n\n        Args:\n            frames (list): list of frames to calculate trajectories for (default: None -> all frames)\n        \"\"\"\n\n        listObj = self.load_objects_from_json__orjson()\n        if len(listObj) == 0:\n            return\n\n        frames = frames if frames else range(len(listObj))\n\n        for i in frames:\n            listobjframe = listObj[i]['frame_idx']\n            for object in listObj[i]['frame_data']:\n                id = object['tracker_id']\n                self.minID = min(self.minID, id - 1)\n                self.rec_frame_for_id(id, listobjframe)\n                label = object['class_name']\n                label_ascii = sum([ord(c) for c in label])\n                idx = label_ascii % len(color_palette)\n                color = color_palette[idx]\n                center = mathOps.centerOFmass(object['segment'])\n                try:\n                    centers_rec = self.CURRENT_ANNOATAION_TRAJECTORIES['id_' + str(\n                        id)]\n\n                    try:\n                        (xp, yp) = centers_rec[listobjframe - 2]\n                        (xn, yn) = center\n                        if (xp == -1 or xn == -1):\n                            c = 5 / 0\n                        r = 0.5\n                        x = r * xn + (1 - r) * xp\n                        y = r * yn + (1 - r) * yp\n                        center = (int(x), int(y))\n                    except:\n                        pass\n                    centers_rec[listobjframe - 1] = center\n                    self.CURRENT_ANNOATAION_TRAJECTORIES['id_' +\n                                                         str(id)] = centers_rec\n                    self.CURRENT_ANNOATAION_TRAJECTORIES['id_color_' + str(\n                        id)] = color\n                except:\n                    centers_rec = [(-1, - 1)] * int(self.TOTAL_VIDEO_FRAMES)\n                    centers_rec[listobjframe - 1] = center\n                    self.CURRENT_ANNOATAION_TRAJECTORIES['id_' +\n                                                         str(id)] = centers_rec\n                    self.CURRENT_ANNOATAION_TRAJECTORIES['id_color_' + str(\n                        id)] = color\n\n    def right_click_menu(self):\n        \"\"\"\n        Summary:\n            Set the right click menu according to the current annotation mode\n        \"\"\"\n\n        self.set_sam_toolbar_enable(False)\n        self.sam_model_comboBox.setCurrentIndex(0)\n        self.sam_buttons_colors(\"x\")\n        # # right click menu\n\n        #         0  createMode,\n        #         1  editMode,\n        #         2  edit,\n        #         3  enhance,\n        #         4  interpolate,\n        #         5  mark_as_key,\n        #         6  remove_all_keyframes,\n        #         7  scale,\n        #         8  copyShapes,\n        #         9  pasteShapes,\n        #         10 copy,\n        #         11 delete,\n        #         12 undo,\n        #         13 undoLastPoint,\n        #         14 addPointToEdge,\n        #         15 removePoint,\n        #         16 update_curr_frame,\n        #         17 ignore_changes\n\n        mode = self.current_annotation_mode\n        video_menu_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9,     11,         14, 15, 16, 17]\n        image_menu_list = [0, 1, 2, 3,                   10, 11, 12, 13, 14, 15]\n\n        if self.current_annotation_mode == \"video\":\n            self.canvas.menus[0].clear()\n            utils.addActions(\n                self.canvas.menus[0], (self.actions.menu[i] for i in video_menu_list))\n\n            self.menus.edit.clear()\n            utils.addActions(\n                self.menus.edit, (self.actions.menu[i] for i in video_menu_list))\n        else:\n            self.canvas.menus[0].clear()\n            utils.addActions(\n                self.canvas.menus[0], (self.actions.menu[i] for i in image_menu_list))\n\n            self.menus.edit.clear()\n            utils.addActions(\n                self.menus.edit, (self.actions.menu[i] for i in image_menu_list))\n\n    def reset_for_new_mode(self, mode):\n        self.CURRENT_ANNOATAION_TRAJECTORIES = {'length': 30,\n                                                'alpha': 0.70}\n        self.key_frames.clear()\n        self.id_frames_rec.clear()\n\n        for shape in self.canvas.shapes:\n            self.canvas.deleteShape(shape)\n\n        self.resetState()\n\n        self.CURRENT_SHAPES_IN_IMG = []\n        self.image = QtGui.QImage()\n        self.CURRENT_FRAME_IMAGE = None\n\n        self.current_annotation_mode = mode\n        self.canvas.current_annotation_mode = mode\n        self.right_click_menu()\n        self.global_listObj = []\n        self.minID = -2\n        self.maxID = 0\n\n    def openVideo(self):\n\n        # enable export if json file exists\n\n        try:\n            cv2.destroyWindow('video processing')\n        except:\n            pass\n        if not self.mayContinue():\n            return\n        videoFile = QtWidgets.QFileDialog.getOpenFileName(\n            self, self.tr(\"%s - Open Video\") % __appname__, \".\",\n            self.tr(\"Video files (*.mp4 *.avi *.mov)\")\n        )\n\n        if videoFile[0]:\n            # clear the file list widget\n            self.fileListWidget.clear()\n            self.uniqLabelList.clear()\n            self.reset_for_new_mode(\"video\")\n\n            self.CURRENT_VIDEO_NAME = videoFile[0].split(\n                \".\")[-2].split(\"/\")[-1]\n            self.CURRENT_VIDEO_PATH = \"/\".join(\n                videoFile[0].split(\".\")[-2].split(\"/\")[:-1])\n\n            json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n            if os.path.exists(json_file_name):\n                self.actions.export.setEnabled(True)\n            else:\n                self.actions.export.setEnabled(False)\n\n            cap = cv2.VideoCapture(videoFile[0])\n            self.CURRENT_VIDEO_HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n            self.CURRENT_VIDEO_WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n            self.CAP = cap\n            self.TOTAL_VIDEO_FRAMES = int(\n                self.CAP.get(cv2.CAP_PROP_FRAME_COUNT))\n            self.CURRENT_VIDEO_FPS = self.CAP.get(cv2.CAP_PROP_FPS)\n            self.main_video_frames_slider.setMaximum(self.TOTAL_VIDEO_FRAMES)\n            self.frames_to_track_slider.setMaximum(\n                self.TOTAL_VIDEO_FRAMES - self.INDEX_OF_CURRENT_FRAME)\n            self.main_video_frames_slider.setValue(2)\n            self.INDEX_OF_CURRENT_FRAME = 1\n            self.main_video_frames_slider.setValue(self.INDEX_OF_CURRENT_FRAME)\n\n            # self.addToolBarBreak\n            self.set_video_controls_visibility(True)\n\n            self.update_tracking_method()\n\n            self.calculate_trajectories()\n            keys = list(self.id_frames_rec.keys())\n            idsORG = [int(keys[i][3:]) for i in range(len(keys))]\n            if len(idsORG) > 0:\n                self.maxID = max(idsORG)\n\n            for option in self.vis_options:\n                option.setEnabled(True)\n\n        # disable save and save as\n        self.actions.save.setEnabled(False)\n        self.actions.saveAs.setEnabled(False)\n\n    def openVideoFrames(self):\n        try:\n            video_frame_extractor_dialog = utils.VideoFrameExtractor(\n                self._config[\"mute\"], notification.PopUp)\n            video_frame_extractor_dialog.exec()\n\n            dir_path_name = video_frame_extractor_dialog.path_name\n            if dir_path_name:\n                self.target_directory = dir_path_name\n                self.importDirImages(dir_path_name)\n                self.set_video_controls_visibility(False)\n                # enable Visualization Options\n                for option in self.vis_options:\n                    if option in [self.id_checkBox, self.traj_checkBox, self.trajectory_length_lineEdit]:\n                        option.setEnabled(False)\n                    else:\n                        option.setEnabled(True)\n        except Exception as e:\n            MsgBox.OKmsgBox(\"Error\", f\"Error: {e}\", \"critical\")\n\n    def load_shapes_for_video_frame(self, json_file_name, index):\n        # this function loads the shapes for the video frame from the json file\n        # first we read the json file in the form of a list\n        # we need to parse from it data for the current frame\n\n        target_frame_idx = index\n        listObj = self.load_objects_from_json__orjson()\n\n        listObj = np.array(listObj)\n\n        shapes = []\n        i = target_frame_idx - 1\n        frame_objects = listObj[i]['frame_data']\n        for object_ in frame_objects:\n            shape = {}\n            shape[\"label\"] = object_[\"class_name\"]\n            shape[\"group_id\"] = (object_['tracker_id'])\n            shape[\"content\"] = (object_['confidence'])\n            shape[\"bbox\"] = object_['bbox']\n            points = object_['segment']\n            points = np.array(points, np.int16).flatten().tolist()\n            shape[\"points\"] = points\n            shape[\"shape_type\"] = \"polygon\"\n            shape[\"other_data\"] = {}\n            shape[\"flags\"] = {}\n            shapes.append(shape)\n\n        self.CURRENT_SHAPES_IN_IMG = shapes\n\n    def loadFramefromVideo(self, frame_array, index=1):\n        \n        self.resetState()\n        self.canvas.setEnabled(False)\n\n        self.imageData = frame_array.data\n\n        self.CURRENT_FRAME_IMAGE = frame_array\n        image = QtGui.QImage(self.imageData, self.imageData.shape[1], self.imageData.shape[0],\n                             QtGui.QImage.Format.Format_BGR888)\n        self.image = image\n        if self._config[\"keep_prev\"]:\n            prev_shapes = self.canvas.shapes\n\n        flags = {k: False for k in self._config[\"flags\"] or []}\n        self.canvas.loadPixmap(QtGui.QPixmap.fromImage(image))\n\n        if self.TrackingMode:\n            image = self.draw_bb_on_image(image, self.CURRENT_SHAPES_IN_IMG)\n            self.canvas.loadPixmap(QtGui.QPixmap.fromImage(image))\n            if len(self.CURRENT_SHAPES_IN_IMG) > 0:\n                self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n        else:\n            if self.labelFile:\n                self.CURRENT_SHAPES_IN_IMG = self.labelFile.shapes\n                image = self.draw_bb_on_image(\n                    image, self.CURRENT_SHAPES_IN_IMG)\n                self.canvas.loadPixmap(QtGui.QPixmap.fromImage(image))\n                self.loadLabels(self.labelFile.shapes)\n                if self.labelFile.flags is not None:\n                    flags.update(self.labelFile.flags)\n\n            else:\n                json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n                if os.path.exists(json_file_name):\n                    self.load_shapes_for_video_frame(json_file_name, index)\n                    image = self.draw_bb_on_image(\n                        image, self.CURRENT_SHAPES_IN_IMG)\n                    self.canvas.loadPixmap(QtGui.QPixmap.fromImage(image))\n                    if len(self.CURRENT_SHAPES_IN_IMG) > 0:\n                        self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n\n        self.loadFlags(flags)\n        self.setClean()\n        self.canvas.setEnabled(True)\n        # set zoom values\n        is_initial_load = not self.zoom_values\n        if self.filename in self.zoom_values:\n            self.zoomMode = self.zoom_values[self.filename][0]\n            self.setZoom(self.zoom_values[self.filename][1])\n        elif is_initial_load or not self._config[\"keep_prev_scale\"]:\n            self.adjustScale(initial=True)\n        # set scroll values\n\n        self.paintCanvas()\n        self.toggleActions(True)\n        self.canvas.setFocus()\n        self.status(self.tr(\n            f'Loaded {self.CURRENT_VIDEO_NAME} frame {self.INDEX_OF_CURRENT_FRAME}'))\n\n    def nextFrame_buttonClicked(self):\n        self.update_current_frame_annotation_button_clicked()\n        # first assert that the new value of the slider is not greater than the total number of frames\n        new_value = self.INDEX_OF_CURRENT_FRAME + self.FRAMES_TO_SKIP\n        if new_value >= self.TOTAL_VIDEO_FRAMES:\n            new_value = self.TOTAL_VIDEO_FRAMES\n        self.main_video_frames_slider.setValue(new_value)\n\n    def next_1_Frame_buttonClicked(self):\n        self.update_current_frame_annotation_button_clicked()\n        # first assert that the new value of the slider is not greater than the total number of frames\n        new_value = self.INDEX_OF_CURRENT_FRAME + 1\n        if new_value >= self.TOTAL_VIDEO_FRAMES:\n            new_value = self.TOTAL_VIDEO_FRAMES\n        self.main_video_frames_slider.setValue(new_value)\n\n    def previousFrame_buttonClicked(self):\n        self.update_current_frame_annotation_button_clicked()\n        new_value = self.INDEX_OF_CURRENT_FRAME - self.FRAMES_TO_SKIP\n        if new_value <= 0:\n            new_value = 0\n        self.main_video_frames_slider.setValue(new_value)\n\n    def previous_1_Frame_buttonclicked(self):\n        self.update_current_frame_annotation_button_clicked()\n        new_value = self.INDEX_OF_CURRENT_FRAME - 1\n        if new_value <= 0:\n            new_value = 0\n        self.main_video_frames_slider.setValue(new_value)\n\n    def frames_to_skip_slider_changed(self):\n        self.FRAMES_TO_SKIP = self.frames_to_skip_slider.value()\n        zeros = (2 - int(np.log10(self.FRAMES_TO_SKIP + 0.9))) * '0'\n        self.frames_to_skip_label.setText(\n            'Jump forward/backward frames: ' + zeros + str(self.FRAMES_TO_SKIP))\n\n    def playPauseButtonClicked(self):\n        # we can check the state of the button by checking the button text\n\n        if self.playPauseButton_mode == \"Play\":\n            self.playPauseButton_mode = \"Pause\"\n            self.playPauseButton.setShortcut(self._config['shortcuts']['play'])\n            self.playPauseButton.setToolTip(\n                f'Play ({self._config[\"shortcuts\"][\"play\"]})')\n            self.playPauseButton.setIcon(\n                self.style().standardIcon(QtWidgets.QStyle.StandardPixmap.SP_MediaPause))\n            # play the video at the current fps untill the user clicks pause\n            self.play_timer = QtCore.QTimer(self)\n            # use play_timer.timeout.connect to call a function every time the timer times out\n            # but we need to call the function every interval of time\n            # so we need to call the function every 1/fps seconds\n            self.play_timer.timeout.connect(self.move_frame_by_frame)\n            self.play_timer.start(40)\n            # note that the timer interval is in milliseconds\n\n            # while self.timer.isActive():\n        elif self.playPauseButton_mode == \"Pause\":\n            # first stop the timer\n            self.play_timer.stop()\n\n            self.playPauseButton_mode = \"Play\"\n            self.playPauseButton.setShortcut(self._config['shortcuts']['play'])\n            self.playPauseButton.setToolTip(\n                f'Pause ({self._config[\"shortcuts\"][\"play\"]})')\n            self.playPauseButton.setIcon(\n                self.style().standardIcon(QtWidgets.QStyle.StandardPixmap.SP_MediaPlay))\n        \n    def move_frame_by_frame(self):\n        QtWidgets.QApplication.processEvents()\n        self.main_video_frames_slider.setValue(self.INDEX_OF_CURRENT_FRAME + 1)\n\n    def main_video_frames_slider_changed(self):\n\n        if self.current_annotation_mode != \"video\":\n            return\n\n        if self.sam_model_comboBox.currentIndex() != 0 and self.canvas.SAM_mode != \"finished\" and not self.TrackingMode:\n            self.sam_clear_annotation_button_clicked()\n            self.sam_buttons_colors(\"X\")\n\n        try:\n            x = self.CURRENT_VIDEO_PATH\n        except:\n            return\n\n        frame_idx = self.main_video_frames_slider.value()\n\n        self.INDEX_OF_CURRENT_FRAME = frame_idx\n        self.CAP.set(cv2.CAP_PROP_POS_FRAMES, frame_idx - 1)\n\n        # setting text of labels\n        fps = self.CAP.get(cv2.CAP_PROP_FPS)\n        zeros = (int(np.log10(self.TOTAL_VIDEO_FRAMES + 0.9)) -\n                 int(np.log10(frame_idx + 0.9))) * '0'\n        self.main_video_frames_label_1.setText(\n            f'frame {zeros}{frame_idx} / {int(self.TOTAL_VIDEO_FRAMES)}')\n        self.frame_time = mathOps.mapFrameToTime(frame_idx, fps)\n        frame_text = (\"%02d:%02d:%02d:%03d\" % (\n            self.frame_time[0], self.frame_time[1], self.frame_time[2], self.frame_time[3]))\n        video_duration = mathOps.mapFrameToTime(self.TOTAL_VIDEO_FRAMES, fps)\n        video_duration_text = (\"%02d:%02d:%02d:%03d\" % (\n            video_duration[0], video_duration[1], video_duration[2], video_duration[3]))\n        final_text = frame_text + \" / \" + video_duration_text\n        self.main_video_frames_label_2.setText(f'time {final_text}')\n\n        # reading the current frame from the video and loading it into the canvas\n        success, img = self.CAP.read()\n        if success:\n            frame_array = np.array(img)\n            self.loadFramefromVideo(frame_array, frame_idx)\n        else:\n            pass\n        self.frames_to_track_slider.setMaximum(\n            self.TOTAL_VIDEO_FRAMES - self.INDEX_OF_CURRENT_FRAME)\n\n    def frames_to_track_input_changed(self, text):\n        try:\n            value = int(text)\n            if 2 <= value <= self.frames_to_track_slider.maximum():\n                self.frames_to_track_slider.setValue(value)\n            elif value > self.frames_to_track_slider.maximum():\n                self.frames_to_track_slider.setValue(\n                    self.frames_to_track_slider.maximum())\n            elif value < 2:\n                self.frames_to_track_slider.setValue(1)\n        except ValueError:\n            pass\n\n    def frames_to_track_slider_changed(self, value):\n        self.frames_to_track_input.setText(str(value))\n        self.FRAMES_TO_TRACK = self.frames_to_track_slider.value()\n\n    def track_assigned_objects_button_clicked(self):\n        # first check if there is objects in self.canvas.shapes list or not . if not then output a error message and return\n        if len(self.labelList.selectedItems()) == 0:\n            self.errorMessage(\n                \"found No objects to track\",\n                \"you need to assign at least one object to track\",\n            )\n            return\n\n        self.TRACK_ASSIGNED_OBJECTS_ONLY = True\n        self.track_buttonClicked()\n        self.TRACK_ASSIGNED_OBJECTS_ONLY = False\n\n    def update_gui_after_tracking(self, index):\n        if index != self.FRAMES_TO_TRACK - 1:\n            self.main_video_frames_slider.setValue(\n                self.INDEX_OF_CURRENT_FRAME + 1)\n        QtWidgets.QApplication.processEvents()\n\n    def certain_area_clicked(self, index):\n\n        self.canvas.cancelManualDrawing()\n        self.setEditMode()\n        self.canvas.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.CrossCursor))\n\n        if index == 0:\n            self.canvas.tracking_area = \"\"\n            self.canvas.tracking_area_polygon = []\n        else:\n            self.canvas.tracking_area = \"drawing\"\n            self.canvas.tracking_area_polygon = []\n\n    def track_dropdown_changed(self, index):\n        self.selected_option = index\n\n    def start_tracking_button_clicked(self):\n        try:\n            try:\n                if self.selected_option == 0:\n                    self.track_buttonClicked()\n                elif self.selected_option == 1:\n                    self.track_assigned_objects_button_clicked()\n                elif self.selected_option == 2:\n                    self.track_full_video_button_clicked()\n            except Exception as e:\n                self.track_buttonClicked()\n        except Exception as e:\n            MsgBox.OKmsgBox(\"Error\", f\"Error: {e}\", \"critical\")\n\n    def track_buttonClicked(self):\n\n        # Disable Exports & Change button text\n        self.actions.export.setEnabled(False)\n\n        self.tracking_progress_bar.setVisible(True)\n\n        # first we need to check there is a json file with the same name as the video\n        listObj = self.load_objects_from_json__orjson()\n\n        existing_annotation = False\n        shapes = self.canvas.shapes\n        tracks_to_follow = None\n        if len(shapes) > 0:\n            existing_annotation = True\n            tracks_to_follow = []\n            for shape in shapes:\n                if shape.group_id != None:\n                    tracks_to_follow.append(int(shape.group_id))\n\n        self.TrackingMode = True\n        curr_frame, prev_frame = None, None\n\n        if self.FRAMES_TO_TRACK + self.INDEX_OF_CURRENT_FRAME <= self.TOTAL_VIDEO_FRAMES:\n            number_of_frames_to_track = self.FRAMES_TO_TRACK\n        else:\n            number_of_frames_to_track = self.TOTAL_VIDEO_FRAMES - self.INDEX_OF_CURRENT_FRAME\n\n        self.interrupted = False\n        for i in range(number_of_frames_to_track):\n            QtWidgets.QApplication.processEvents()\n            if self.interrupted:\n                self.interrupted = False\n                break\n            if i % 100 == 0:\n                self.load_objects_to_json__orjson(listObj)\n            self.tracking_progress_bar.setValue(\n                int((i + 1) / number_of_frames_to_track * 100))\n\n            if existing_annotation:\n                existing_annotation = False\n                shapes = self.canvas.shapes\n                shapes = mathOps.convert_qt_shapes_to_shapes(shapes)\n            else:\n                with torch.no_grad():\n                    shapes = self.annotate_one(called_from_tracking=True)\n\n            curr_frame = self.CURRENT_FRAME_IMAGE\n            if len(shapes) == 0:\n                self.update_gui_after_tracking(i)\n                continue\n\n            for shape in shapes:\n                if shape['content'] is None:\n                    shape['content'] = 1.0\n            boxes, confidences, class_ids, segments = mathOps.get_boxes_conf_classids_segments(\n                shapes)\n\n            boxes = np.array(boxes, dtype=int)\n            confidences = np.array(confidences)\n            class_ids = np.array(class_ids)\n            detections = Detections(\n                xyxy=boxes,\n                confidence=confidences,\n                class_id=class_ids,\n            )\n            boxes = torch.from_numpy(detections.xyxy)\n            confidences = torch.from_numpy(detections.confidence)\n            class_ids = torch.from_numpy(detections.class_id)\n\n            dets = torch.cat((boxes, confidences.unsqueeze(\n                1), class_ids.unsqueeze(1)), dim=1)\n            dets = dets.to(torch.float32)\n            if hasattr(self.tracker, 'tracker') and hasattr(self.tracker.tracker, 'camera_update'):\n                if prev_frame is not None and curr_frame is not None:  # camera motion compensation\n                    self.tracker.tracker.camera_update(prev_frame, curr_frame)\n            prev_frame = curr_frame\n            with torch.no_grad():\n                org_tracks = self.tracker.update(\n                    dets.cpu(), self.CURRENT_FRAME_IMAGE)\n\n            tracks = []\n            for org_track in org_tracks:\n                track = []\n                for i in range(6):\n                    track.append(int(org_track[i]))\n                track[4] += int(self.maxID)\n                track.append(org_track[6])\n\n                tracks.append(track)\n\n            matched_shapes, unmatched_shapes = mathOps.match_detections_with_tracks(\n                shapes, tracks)\n            shapes = matched_shapes\n\n            self.CURRENT_SHAPES_IN_IMG = [\n                shape_ for shape_ in shapes if shape_[\"group_id\"] is not None]\n\n            if self.TRACK_ASSIGNED_OBJECTS_ONLY and tracks_to_follow is not None:\n                try:\n                    if len(self.labelList.selectedItems()) != 0:\n                        tracks_to_follow = []\n                        for item in self.labelList.selectedItems():\n                            x = item.text()\n                            i1, i2 = x.find('D'), x.find(':')\n                            tracks_to_follow.append(int(x[i1 + 2:i2]))\n                    self.CURRENT_SHAPES_IN_IMG = [\n                        shape_ for shape_ in shapes if shape_[\"group_id\"] in tracks_to_follow]\n                except:\n                    # this happens when the user selects a label that is not a tracked object so there is error in extracting the tracker id\n                    # show a message box to the user (hinting to use the tracker on the image first so that the label has a tracker id to be selected)\n                    self.errorMessage(\n                        'Error', 'Please use the tracker on the image first so that you can select labels with IDs to track')\n\n                    return\n\n            # to understand the json output file structure it is a dictionary of frames and each frame is a dictionary of tracker_ids and each tracker_id is a dictionary of bbox , confidence , class_id , segment\n            json_frame = {}\n            json_frame.update({'frame_idx': self.INDEX_OF_CURRENT_FRAME})\n            json_frame_object_list = []\n            for shape in self.CURRENT_SHAPES_IN_IMG:\n                self.rec_frame_for_id(\n                    int(shape[\"group_id\"]), self.INDEX_OF_CURRENT_FRAME, type_='add')\n                json_tracked_object = {}\n                json_tracked_object['tracker_id'] = int(shape[\"group_id\"])\n                json_tracked_object['bbox'] = [int(i) for i in shape['bbox']]\n                json_tracked_object['confidence'] = shape[\"content\"]\n                json_tracked_object['class_name'] = shape[\"label\"]\n                json_tracked_object['class_id'] = coco_classes.index(\n                    shape[\"label\"]) if shape[\"label\"] in coco_classes else -1\n                points = shape[\"points\"]\n                segment = [[int(points[z]), int(points[z + 1])]\n                           for z in range(0, len(points), 2)]\n                json_tracked_object['segment'] = segment\n\n                json_frame_object_list.append(json_tracked_object)\n\n            json_frame.update({'frame_data': json_frame_object_list})\n\n            listObj[self.INDEX_OF_CURRENT_FRAME - 1] = json_frame\n\n            QtWidgets.QApplication.processEvents()\n            self.update_gui_after_tracking(i)\n            print('finished tracking for frame ', self.INDEX_OF_CURRENT_FRAME)\n            \n        self.load_objects_to_json__orjson(listObj)\n\n        # Notify the user that the tracking is finished\n        self._config = get_config()\n        if not self._config[\"mute\"]:\n            if not self.isActiveWindow():\n                notification.PopUp(\"Tracking Completed\")\n\n        self.TrackingMode = False\n        self.labelFile = None\n        self.main_video_frames_slider.setValue(self.INDEX_OF_CURRENT_FRAME - 1)\n        self.main_video_frames_slider.setValue(self.INDEX_OF_CURRENT_FRAME)\n\n        self.tracking_progress_bar.hide()\n        self.tracking_progress_bar.setValue(0)\n\n        # Enable Exports & Restore button Text and Color\n        self.actions.export.setEnabled(True)\n\n    def track_full_video_button_clicked(self):\n        self.FRAMES_TO_TRACK = int(\n            self.TOTAL_VIDEO_FRAMES - self.INDEX_OF_CURRENT_FRAME)\n        self.track_buttonClicked()\n\n    def set_video_controls_visibility(self, visible=False):\n        # make it invisible by default\n        self.videoControls.setVisible(visible)\n        for widget in self.videoControls.children():\n            try:\n                widget.setVisible(visible)\n            except:\n                pass\n        self.videoControls_2.setVisible(visible)\n        for widget in self.videoControls_2.children():\n            try:\n                widget.setVisible(visible)\n            except:\n                pass\n\n    def traj_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"traj\"] = self.traj_checkBox.isChecked(\n            )\n            self.update_current_frame_annotation()\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n\n    def mask_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"mask\"] = self.mask_checkBox.isChecked(\n            )\n            self.update_current_frame_annotation()\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n        self.refresh_image_MODE()\n\n    def class_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"class\"] = self.class_checkBox.isChecked(\n            )\n            self.update_current_frame_annotation()\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n        self.refresh_image_MODE()\n\n    def conf_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"conf\"] = self.conf_checkBox.isChecked(\n            )\n            self.update_current_frame_annotation()\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n        self.refresh_image_MODE()\n\n    def id_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"id\"] = self.id_checkBox.isChecked()\n            self.update_current_frame_annotation()\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n\n    def bbox_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"bbox\"] = self.bbox_checkBox.isChecked(\n            )\n            self.update_current_frame_annotation()\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n        self.refresh_image_MODE()\n\n    def polygons_visable_checkBox_changed(self):\n        try:\n            self.CURRENT_ANNOATAION_FLAGS[\"polygons\"] = self.polygons_visable_checkBox.isChecked(\n            )\n            self.update_current_frame_annotation()\n            for shape in self.canvas.shapes:\n                self.canvas.setShapeVisible(\n                    shape, self.CURRENT_ANNOATAION_FLAGS[\"polygons\"])\n        except:\n            pass\n\n    def export_as_video_button_clicked(self, output_filename=None):\n        self.update_current_frame_annotation()\n        input_video_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}.mp4'\n        output_video_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.mp4'\n        if output_filename is not False:\n            output_video_file_name = output_filename\n        input_cap = cv2.VideoCapture(input_video_file_name)\n        output_cap = cv2.VideoWriter(output_video_file_name, cv2.VideoWriter_fourcc(\n            *'mp4v'), int(self.CURRENT_VIDEO_FPS), (int(self.CURRENT_VIDEO_WIDTH), int(self.CURRENT_VIDEO_HEIGHT)))\n        listObj = self.load_objects_from_json__orjson()\n\n        # make a progress bar for exporting video (with percentage of progress)   TO DO LATER\n        empty_frame = False\n        empty_video = True\n        for target_frame_idx in range(self.TOTAL_VIDEO_FRAMES):\n            try:\n                self.INDEX_OF_CURRENT_FRAME = target_frame_idx + 1\n                ret, image = input_cap.read()\n                shapes = []\n                frame_objects = listObj[target_frame_idx]['frame_data']\n                for object_ in frame_objects:\n                    shape = {}\n                    shape[\"label\"] = object_['class_name']\n                    shape[\"group_id\"] = str(object_['tracker_id'])\n                    shape[\"content\"] = str(object_['confidence'])\n                    shape[\"bbox\"] = object_['bbox']\n                    points = object_['segment']\n                    points = np.array(points, np.int16).flatten().tolist()\n                    shape[\"points\"] = points\n                    shape[\"shape_type\"] = \"polygon\"\n                    shape[\"other_data\"] = {}\n                    shape[\"flags\"] = {}\n                    shapes.append(shape)\n\n                if len(shapes) == 0:\n                    if not empty_frame:\n                        self.waitWindow(visible=True, text=f'Processing...')\n                        empty_frame = True\n                    continue\n                self.waitWindow(\n                    visible=True, text=f'Please Wait.\\nFrame {target_frame_idx} is being exported...')\n                image = self.draw_bb_on_image(\n                    image, shapes, image_qt_flag=False)\n                output_cap.write(image)\n                empty_frame = False\n                empty_video = False\n            except:\n                input_cap.release()\n                output_cap.release()\n\n        input_cap.release()\n        output_cap.release()\n        self.waitWindow()\n\n        try:\n            if empty_video:\n                os.remove(output_video_file_name)\n                return False\n        except:\n            pass\n\n        self.INDEX_OF_CURRENT_FRAME = self.main_video_frames_slider.value()\n        # show message saying that the video is exported\n        if output_filename is False:\n            MsgBox.OKmsgBox(\"Export Video\", \"Done Exporting Video\")\n\n        if output_filename is not False:\n            return output_filename\n\n    def clear_video_annotations_button_clicked(self):\n        self.global_listObj = []\n        self.CURRENT_ANNOATAION_TRAJECTORIES = {'length': 30,\n                                                'alpha': 0.70}\n        self.key_frames.clear()\n        self.id_frames_rec.clear()\n        self.minID = -2\n        self.maxID = 0\n\n        for shape in self.canvas.shapes:\n            self.canvas.deleteShape(shape)\n\n        self.CURRENT_SHAPES_IN_IMG = []\n\n        # just delete the json file and reload the video\n        # to delete the json file we need to know the name of the json file which is the same as the video name\n        json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n        # now delete the json file if it exists\n        if os.path.exists(json_file_name):\n            os.remove(json_file_name)\n        MsgBox.OKmsgBox(\"clear annotations\",\n                        \"All video frames annotations are cleared\")\n        self.main_video_frames_slider.setValue(2)\n        self.main_video_frames_slider.setValue(1)\n\n    def update_current_frame_annotation_button_clicked(self):\n\n        if self.sam_model_comboBox.currentIndex() != 0 and self.canvas.SAM_mode != \"finished\" and not self.TrackingMode:\n            self.sam_clear_annotation_button_clicked()\n\n        try:\n            x = self.CURRENT_VIDEO_PATH\n        except:\n            return\n        self.update_current_frame_annotation()\n        self.main_video_frames_slider_changed()\n\n    def update_current_frame_annotation(self):\n\n        if self.current_annotation_mode != \"video\":\n            return\n\n        listObj = self.load_objects_from_json__orjson()\n\n        json_frame = {}\n        json_frame.update({'frame_idx': self.INDEX_OF_CURRENT_FRAME})\n        json_frame_object_list = []\n\n        shapes = mathOps.convert_qt_shapes_to_shapes(self.canvas.shapes)\n        for shape in shapes:\n            json_tracked_object = {}\n            if shape[\"group_id\"] != None:\n                json_tracked_object['tracker_id'] = int(shape[\"group_id\"])\n            else:\n                json_tracked_object['tracker_id'] = self.minID\n                self.minID -= 1\n            bbox = shape[\"bbox\"]\n            bbox = [int(bbox[0]), int(bbox[1]), int(bbox[2]), int(bbox[3])]\n            json_tracked_object['bbox'] = bbox\n            json_tracked_object['confidence'] = str(\n                shape[\"content\"] if shape[\"content\"] != None else 1)\n            json_tracked_object['class_name'] = shape[\"label\"]\n            json_tracked_object['class_id'] = coco_classes.index(\n                shape[\"label\"]) if shape[\"label\"] in coco_classes else -1\n            points = shape[\"points\"]\n            segment = [[int(points[z]), int(points[z + 1])]\n                       for z in range(0, len(points), 2)]\n            json_tracked_object['segment'] = segment\n\n            json_frame_object_list.append(json_tracked_object)\n        json_frame.update({'frame_data': json_frame_object_list})\n\n        listObj[self.INDEX_OF_CURRENT_FRAME - 1] = json_frame\n        \n        self.load_objects_to_json__orjson(listObj)\n        print(\"saved frame annotation\")\n\n    def trajectory_length_lineEdit_changed(self):\n        try:\n            text = self.trajectory_length_lineEdit.text()\n            self.CURRENT_ANNOATAION_TRAJECTORIES['length'] = int(\n                text) if text != '' else 1\n            self.main_video_frames_slider_changed()\n        except:\n            pass\n\n    def addVideoControls(self):\n        # add video controls toolbar with custom style (background color , spacing , hover color)\n        self.videoControls = QtWidgets.QToolBar()\n        self.videoControls.setMovable(True)\n        self.videoControls.setFloatable(True)\n        self.videoControls.setObjectName(\"videoControls\")\n        self.videoControls.setStyleSheet(\n            \"QToolBar#videoControls { border: 50px }\")\n        self.addToolBar(Qt.ToolBarArea.BottomToolBarArea, self.videoControls)\n\n        self.videoControls_2 = QtWidgets.QToolBar()\n        self.videoControls_2.setMovable(True)\n        self.videoControls_2.setFloatable(True)\n        self.videoControls_2.setObjectName(\"videoControls_2\")\n        self.videoControls_2.setStyleSheet(\n            \"QToolBar#videoControls_2 { border: 50px }\")\n        self.addToolBar(Qt.ToolBarArea.TopToolBarArea, self.videoControls_2)\n\n        self.frames_to_skip_slider = QtWidgets.QSlider(QtCore.Qt.Orientation.Horizontal)\n        self.frames_to_skip_slider.setMinimum(1)\n        self.frames_to_skip_slider.setMaximum(100)\n        self.frames_to_skip_slider.setValue(3)\n        self.frames_to_skip_slider.setTickPosition(\n            QtWidgets.QSlider.TickPosition.TicksBelow)\n        self.frames_to_skip_slider.setTickInterval(1)\n        self.frames_to_skip_slider.setMaximumWidth(250)\n        self.frames_to_skip_slider.valueChanged.connect(\n            self.frames_to_skip_slider_changed)\n        self.frames_to_skip_label = QtWidgets.QLabel()\n        self.frames_to_skip_label.setStyleSheet(\n            \"QLabel { font-size: 10pt; font-weight: bold; }\")\n        self.frames_to_skip_slider.setValue(30)\n        self.videoControls.addWidget(self.frames_to_skip_label)\n        self.videoControls.addWidget(self.frames_to_skip_slider)\n\n        self.previousFrame_button = QtWidgets.QPushButton()\n        self.previousFrame_button.setText(\"<<\")\n        self.previousFrame_button.setShortcut(\n            self._config['shortcuts']['prev_x'])\n        self.previousFrame_button.setToolTip(\n            f'Jump Backward ({self._config[\"shortcuts\"][\"prev_x\"]})')\n        self.previousFrame_button.clicked.connect(\n            self.previousFrame_buttonClicked)\n\n        self.previous_1_Frame_button = QtWidgets.QPushButton()\n        self.previous_1_Frame_button.setText(\"<\")\n        self.previous_1_Frame_button.setShortcut(\n            self._config['shortcuts']['prev_1'])\n        self.previous_1_Frame_button.setToolTip(\n            f'Previous Frame ({self._config[\"shortcuts\"][\"prev_1\"]})')\n        self.previous_1_Frame_button.clicked.connect(\n            self.previous_1_Frame_buttonclicked)\n\n        self.playPauseButton = QtWidgets.QPushButton()\n        self.playPauseButton_mode = \"Play\"\n        self.playPauseButton.setShortcut(self._config['shortcuts']['play'])\n        self.playPauseButton.setToolTip(\n            f'Play ({self._config[\"shortcuts\"][\"play\"]})')\n        self.playPauseButton.setIcon(\n            self.style().standardIcon(QtWidgets.QStyle.StandardPixmap.SP_MediaPlay))\n\n        self.playPauseButton.setIconSize(QtCore.QSize(22, 22))\n        self.playPauseButton.setStyleSheet(\"QPushButton { margin: 5px;}\")\n        # when the button is clicked, print \"Pressed!\" in the terminal\n        self.playPauseButton.pressed.connect(self.playPauseButtonClicked)\n\n        self.nextFrame_button = QtWidgets.QPushButton()\n        self.nextFrame_button.setText(\">>\")\n        self.nextFrame_button.setShortcut(self._config['shortcuts']['next_x'])\n        self.nextFrame_button.setToolTip(\n            f'Jump forward ({self._config[\"shortcuts\"][\"next_x\"]})')\n        self.nextFrame_button.clicked.connect(self.nextFrame_buttonClicked)\n\n        self.next_1_Frame_button = QtWidgets.QPushButton()\n        self.next_1_Frame_button.setText(\">\")\n        self.next_1_Frame_button.setShortcut(\n            self._config['shortcuts']['next_1'])\n        self.next_1_Frame_button.setToolTip(\n            f'Next Frame ({self._config[\"shortcuts\"][\"next_1\"]})')\n        self.next_1_Frame_button.clicked.connect(\n            self.next_1_Frame_buttonClicked)\n\n        self.videoControls.addWidget(self.previousFrame_button)\n        self.videoControls.addWidget(self.previous_1_Frame_button)\n        self.videoControls.addWidget(self.playPauseButton)\n        self.videoControls.addWidget(self.next_1_Frame_button)\n        self.videoControls.addWidget(self.nextFrame_button)\n\n        self.main_video_frames_slider = QtWidgets.QSlider(QtCore.Qt.Orientation.Horizontal)\n        self.main_video_frames_slider.setMinimum(1)\n        self.main_video_frames_slider.setMaximum(100)\n        self.main_video_frames_slider.setValue(2)\n        self.main_video_frames_slider.setTickPosition(\n            QtWidgets.QSlider.TickPosition.TicksBelow)\n        self.main_video_frames_slider.setTickInterval(1)\n        self.main_video_frames_slider.setMaximumWidth(1000)\n        self.main_video_frames_slider.valueChanged.connect(\n            self.main_video_frames_slider_changed)\n        self.main_video_frames_label_1 = QtWidgets.QLabel()\n        self.main_video_frames_label_2 = QtWidgets.QLabel()\n        # make the label text bigger and bold\n        self.main_video_frames_label_1.setStyleSheet(\n            \"QLabel { font-size: 12pt; font-weight: bold; }\")\n        self.main_video_frames_label_2.setStyleSheet(\n            \"QLabel { font-size: 12pt; font-weight: bold; }\")\n        # labels should show the current frame number / total number of frames and cuurent time / total time\n        self.videoControls.addWidget(self.main_video_frames_label_1)\n        self.videoControls.addWidget(self.main_video_frames_slider)\n        self.videoControls.addWidget(self.main_video_frames_label_2)\n\n        # now we start the videocontrols_2 toolbar widgets\n\n        # add the slider to control the video frame\n        self.frames_to_track_slider = QtWidgets.QSlider(QtCore.Qt.Orientation.Horizontal)\n        self.frames_to_track_slider.setMinimum(1)\n        self.frames_to_track_slider.setMaximum(100)\n        self.frames_to_track_slider.setValue(4)\n        self.frames_to_track_slider.setTickPosition(\n            QtWidgets.QSlider.TickPosition.TicksBelow)\n        self.frames_to_track_slider.setTickInterval(1)\n        self.frames_to_track_slider.setMaximumWidth(200)\n        self.frames_to_track_slider.valueChanged.connect(\n            self.frames_to_track_slider_changed)\n\n        # add text input to control the slider\n        self.frames_to_track_input = QtWidgets.QLineEdit()\n        self.frames_to_track_input.setText(\"4\")\n        # make the font bigger\n        self.frames_to_track_input.setStyleSheet(\n            \"QLineEdit { font-size: 10pt; }\")\n        self.frames_to_track_input.setMaximumWidth(50)\n        self.frames_to_track_input.textChanged.connect(\n            self.frames_to_track_input_changed)\n\n        self.frames_to_track_label_before = QtWidgets.QLabel(\"Track for\")\n        self.frames_to_track_label_before.setStyleSheet(\n            \"QLabel { font-size: 10pt; font-weight: bold; }\")\n        self.frames_to_track_label_after = QtWidgets.QLabel(\"frames\")\n        self.frames_to_track_label_after.setStyleSheet(\n            \"QLabel { font-size: 10pt; font-weight: bold; }\")\n        self.videoControls_2.addWidget(self.frames_to_track_label_before)\n        self.videoControls_2.addWidget(self.frames_to_track_input)\n        self.videoControls_2.addWidget(self.frames_to_track_label_after)\n        self.videoControls_2.addWidget(self.frames_to_track_slider)\n        self.frames_to_track_slider.setValue(10)\n\n        self.track_dropdown = QtWidgets.QComboBox()\n        self.track_dropdown.addItems(\n            [f\"Track for selected frames\", \"Track Only assigned objects\", \"Track Full Video\"])\n        self.track_dropdown.setCurrentIndex(0)\n        self.track_dropdown.currentIndexChanged.connect(\n            self.track_dropdown_changed)\n        self.videoControls_2.addWidget(self.track_dropdown)\n\n        self.start_button = QtWidgets.QPushButton(\"Start Tracking\")\n        self.start_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/start.png\"))\n        # make the icon bigger\n        self.start_button.setIconSize(QtCore.QSize(24, 24))\n        self.start_button.setStyleSheet(self.buttons_text_style_sheet)\n        self.start_button.clicked.connect(self.start_tracking_button_clicked)\n        self.videoControls_2.addWidget(self.start_button)\n\n        self.tracking_progress_bar_label = QtWidgets.QLabel()\n        self.tracking_progress_bar_label.setStyleSheet(\n            \"QLabel { font-size: 10pt; font-weight: bold; }\")\n        self.tracking_progress_bar_label.setText(\"Tracking Progress\")\n        self.videoControls_2.addWidget(self.tracking_progress_bar_label)\n\n        self.tracking_progress_bar = QtWidgets.QProgressBar()\n        self.tracking_progress_bar.setMaximumWidth(300)\n        self.tracking_progress_bar.setMinimum(0)\n        self.tracking_progress_bar.setMaximum(100)\n        self.tracking_progress_bar.setValue(0)\n        self.videoControls_2.addWidget(self.tracking_progress_bar)\n\n        self.track_stop_button = QtWidgets.QPushButton()\n        self.track_stop_button.setStyleSheet(\n            \"QPushButton {font-size: 10pt; margin: 2px 5px; padding: 2px 7px;font-weight: bold; background-color: #FF9090; color: #FFFFFF;} QPushButton:hover {background-color: #FF0000;} QPushButton:disabled {background-color: #7A7A7A;}\")\n        self.track_stop_button.setStyleSheet(\n            \"QPushButton {font-size: 10pt; margin: 2px 5px; padding: 2px 7px;font-weight: bold; background-color: #FF0000; color: #FFFFFF;} QPushButton:hover {background-color: #FE4242;} QPushButton:disabled {background-color: #7A7A7A;}\")\n\n        self.track_stop_button.setText(\"Stop Tracking\")\n        self.track_stop_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/stop.png\"))\n        # make the icon bigger\n        self.track_stop_button.setIconSize(QtCore.QSize(24, 24))\n        # self.track_stop_button.setShortcut(self._config['shortcuts']['stop'])\n        self.track_stop_button.setToolTip(\n            f'Stop Tracking ({self._config[\"shortcuts\"][\"stop\"]})')\n        self.track_stop_button.pressed.connect(\n            self.Escape_clicked)\n        self.videoControls_2.addWidget(self.track_stop_button)\n\n        # add 5 checkboxes to control the CURRENT ANNOATAION FLAGS including (bbox , id , class , mask , traj)\n        self.bbox_checkBox = QtWidgets.QCheckBox()\n        self.bbox_checkBox.setText(\"bbox\")\n        self.bbox_checkBox.setChecked(True)\n        self.bbox_checkBox.stateChanged.connect(self.bbox_checkBox_changed)\n\n        self.id_checkBox = QtWidgets.QCheckBox()\n        self.id_checkBox.setText(\"id\")\n        self.id_checkBox.setChecked(True)\n        self.id_checkBox.stateChanged.connect(self.id_checkBox_changed)\n\n        self.class_checkBox = QtWidgets.QCheckBox()\n        self.class_checkBox.setText(\"class\")\n        self.class_checkBox.setChecked(True)\n        self.class_checkBox.stateChanged.connect(self.class_checkBox_changed)\n\n        self.conf_checkBox = QtWidgets.QCheckBox()\n        self.conf_checkBox.setText(\"confidence\")\n        self.conf_checkBox.setChecked(True)\n        self.conf_checkBox.stateChanged.connect(self.conf_checkBox_changed)\n\n        self.mask_checkBox = QtWidgets.QCheckBox()\n        self.mask_checkBox.setText(\"mask\")\n        self.mask_checkBox.setChecked(True)\n        self.mask_checkBox.stateChanged.connect(self.mask_checkBox_changed)\n\n        self.traj_checkBox = QtWidgets.QCheckBox()\n        self.traj_checkBox.setText(\"trajectories\")\n        self.traj_checkBox.setChecked(False)\n        self.traj_checkBox.stateChanged.connect(self.traj_checkBox_changed)\n\n        # make qlineedit to alter the  self.CURRENT_ANNOATAION_TRAJECTORIES['length']  value\n        self.trajectory_length_lineEdit = QtWidgets.QLineEdit()\n        self.trajectory_length_lineEdit.setText(str(30))\n        self.trajectory_length_lineEdit.setMaximumWidth(50)\n        self.trajectory_length_lineEdit.editingFinished.connect(\n            self.trajectory_length_lineEdit_changed)\n\n        self.polygons_visable_checkBox = QtWidgets.QCheckBox()\n        self.polygons_visable_checkBox.setText(\"show polygons\")\n        self.polygons_visable_checkBox.setChecked(True)\n        self.polygons_visable_checkBox.stateChanged.connect(\n            self.polygons_visable_checkBox_changed)\n\n        self.vis_options = [self.id_checkBox, self.class_checkBox, self.bbox_checkBox, self.mask_checkBox,\n                            self.polygons_visable_checkBox, self.traj_checkBox, self.trajectory_length_lineEdit, self.conf_checkBox]\n        # add to self.vis_dock\n        self.vis_widget.setLayout(QtWidgets.QGridLayout())\n        self.vis_widget.layout().setContentsMargins(10, 10, 25, 10)  # set padding\n        self.vis_widget.layout().addWidget(self.id_checkBox, 0, 0)\n        self.vis_widget.layout().addWidget(self.class_checkBox, 0, 1)\n        self.vis_widget.layout().addWidget(self.bbox_checkBox, 1, 0)\n        self.vis_widget.layout().addWidget(self.mask_checkBox, 1, 1)\n        self.vis_widget.layout().addWidget(self.traj_checkBox, 2, 0)\n        self.vis_widget.layout().addWidget(self.trajectory_length_lineEdit, 2, 1)\n        self.vis_widget.layout().addWidget(self.polygons_visable_checkBox, 3, 0)\n        self.vis_widget.layout().addWidget(self.conf_checkBox, 3, 1)\n\n        for option in self.vis_options:\n            option.setEnabled(False)\n\n        # save current frame\n        self.update_current_frame_annotation_button = QtWidgets.QPushButton()\n        self.update_current_frame_annotation_button.setStyleSheet(\n            self.buttons_text_style_sheet)\n        self.update_current_frame_annotation_button.setText(\n            \"Apply Changes\")\n        self.update_current_frame_annotation_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/done.png\"))\n        # make the icon bigger\n        self.update_current_frame_annotation_button.setIconSize(\n            QtCore.QSize(24, 24))\n        self.update_current_frame_annotation_button.setShortcut(\n            self._config['shortcuts']['update_frame'])\n        self.update_current_frame_annotation_button.setToolTip(\n            f'Apply changes on current frame ({self._config[\"shortcuts\"][\"update_frame\"]})')\n        self.update_current_frame_annotation_button.clicked.connect(\n            self.update_current_frame_annotation_button_clicked)\n        self.videoControls_2.addWidget(\n            self.update_current_frame_annotation_button)\n\n        # add a button to clear all video annotations\n        self.clear_video_annotations_button = QtWidgets.QPushButton()\n        self.clear_video_annotations_button.setStyleSheet(\n            self.buttons_text_style_sheet)\n        self.clear_video_annotations_button.setText(\"Clear All\")\n        self.clear_video_annotations_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/clear.png\"))\n        # make the icon bigger\n        self.clear_video_annotations_button.setIconSize(QtCore.QSize(24, 24))\n        self.clear_video_annotations_button.setShortcut(\n            self._config['shortcuts']['clear_annotations'])\n        self.clear_video_annotations_button.setToolTip(\n            f'Clears Annotations from all frames ({self._config[\"shortcuts\"][\"clear_annotations\"]})')\n        self.clear_video_annotations_button.clicked.connect(\n            self.clear_video_annotations_button_clicked)\n        self.videoControls_2.addWidget(self.clear_video_annotations_button)\n\n        self.set_video_controls_visibility(False)\n\n    def draw_bb_on_image(self, image, shapes, image_qt_flag=True):\n        return visualizations.draw_bb_on_image(self.CURRENT_ANNOATAION_TRAJECTORIES,\n                                               self.INDEX_OF_CURRENT_FRAME,\n                                               self.CURRENT_ANNOATAION_FLAGS,\n                                               self.TOTAL_VIDEO_FRAMES,\n                                               image, shapes, image_qt_flag)\n\n    def waitWindow(self, visible=False, text=None):\n        if visible:\n            self.canvas.is_loading = True\n            if text is not None:\n                self.canvas.loading_text = text\n        else:\n            self.canvas.is_loading = False\n            self.canvas.loading_text = \"Loading...\"\n        self.canvas.repaint()\n        QtWidgets.QApplication.processEvents()\n\n    def set_sam_toolbar_enable(self, enable=False):\n        for widget in self.sam_toolbar.children():\n            try:\n                widget.setEnabled(enable or widget.accessibleName(\n                ) == 'sam_enhance_annotation_button' or widget.accessibleName() == 'sam_model_comboBox')\n            except:\n                pass\n\n    def set_sam_toolbar_visibility(self, visible=False):\n        if not visible:\n            try:\n                self.sam_clear_annotation_button_clicked()\n                self.sam_buttons_colors(\"X\")\n            except:\n                pass\n        self.sam_toolbar.setVisible(visible)\n        for widget in self.sam_toolbar.children():\n            try:\n                widget.setVisible(visible)\n            except:\n                pass\n\n    def addSamControls(self):\n        # add a toolbar\n        self.sam_toolbar = QtWidgets.QToolBar()\n        self.sam_toolbar.setMovable(True)\n        self.sam_toolbar.setFloatable(True)\n        self.sam_toolbar.setObjectName(\"sam_toolbar\")\n        self.sam_toolbar.setStyleSheet(\n            \"QToolBar#videoControls { border: 50px }\")\n        self.addToolBar(QtCore.Qt.ToolBarArea.TopToolBarArea, self.sam_toolbar)\n\n        # add a label that says \"sam model\"\n        self.sam_model_label = QtWidgets.QLabel()\n        self.sam_model_label.setText(\"SAM Model\")\n        self.sam_model_label.setStyleSheet(\n            \"QLabel { font-size: 10pt; font-weight: bold; }\")\n        self.sam_toolbar.addWidget(self.sam_model_label)\n\n        # add a dropdown menu to select the sam model\n        self.sam_model_comboBox = QtWidgets.QComboBox()\n        self.sam_model_comboBox.setAccessibleName(\"sam_model_comboBox\")\n        # add a label inside the combobox that says \"Select Model (SAM disabled)\" and make it unselectable\n        self.sam_model_comboBox.addItem(\"Select Model (SAM disabled)\")\n        self.sam_model_comboBox.addItems(self.sam_models())\n        self.sam_model_comboBox.currentIndexChanged.connect(\n            self.sam_model_comboBox_changed)\n        self.sam_toolbar.addWidget(self.sam_model_comboBox)\n\n        # add a button for adding a point in sam\n        self.sam_add_point_button = QtWidgets.QPushButton()\n        self.sam_add_point_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_add_point_button.setText(\"Add\")\n        # add icon to button\n        self.sam_add_point_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/add.png\"))\n        # make the icon bigger\n        self.sam_add_point_button.setIconSize(QtCore.QSize(24, 24))\n        self.sam_add_point_button.setToolTip(\n            f'Add point ({self._config[\"shortcuts\"][\"SAM_add_point\"]})')\n        # set shortcut\n        self.sam_add_point_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_add_point\"])\n        self.sam_add_point_button.clicked.connect(\n            self.sam_add_point_button_clicked)\n        self.sam_toolbar.addWidget(self.sam_add_point_button)\n\n        # add a button for removing a point in sam\n        self.sam_remove_point_button = QtWidgets.QPushButton()\n        self.sam_remove_point_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_remove_point_button.setText(\"Remove\")\n        # add icon to button\n        self.sam_remove_point_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/remove.png\"))\n        # make the icon bigger\n        self.sam_remove_point_button.setIconSize(QtCore.QSize(24, 24))\n        # set hover text\n        self.sam_remove_point_button.setToolTip(\n            f'Remove Point ({self._config[\"shortcuts\"][\"SAM_remove_point\"]})')\n        # set shortcut\n        self.sam_remove_point_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_remove_point\"])\n        self.sam_remove_point_button.clicked.connect(\n            self.sam_remove_point_button_clicked)\n        self.sam_toolbar.addWidget(self.sam_remove_point_button)\n\n        # add a button for selecting a box in sam\n        self.sam_select_rect_button = QtWidgets.QPushButton()\n        self.sam_select_rect_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_select_rect_button.setText(\"Box\")\n        # add icon to button\n        self.sam_select_rect_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/bbox.png\"))\n        # make the icon bigger\n        self.sam_select_rect_button.setIconSize(QtCore.QSize(24, 24))\n\n        # set hover text\n        self.sam_select_rect_button.setToolTip(\n            f'Add Box ({self._config[\"shortcuts\"][\"SAM_select_rect\"]})')\n        # set shortcut\n        self.sam_select_rect_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_select_rect\"])\n        self.sam_select_rect_button.clicked.connect(\n            self.sam_select_rect_button_clicked)\n        self.sam_toolbar.addWidget(self.sam_select_rect_button)\n\n        # add a point for clearing the annotation\n        self.sam_clear_annotation_button = QtWidgets.QPushButton()\n        self.sam_clear_annotation_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_clear_annotation_button.setText(\"Clear\")\n        # add icon to button\n        self.sam_clear_annotation_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/clear.png\"))\n        # make the icon bigger\n        self.sam_clear_annotation_button.setIconSize(QtCore.QSize(24, 24))\n        self.sam_clear_annotation_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_clear\"])\n        self.sam_clear_annotation_button.setToolTip(\n            f'Clear points and boxes ({self._config[\"shortcuts\"][\"SAM_clear\"]})')\n        self.sam_clear_annotation_button.clicked.connect(\n            self.sam_clear_annotation_button_clicked)\n        self.sam_toolbar.addWidget(self.sam_clear_annotation_button)\n\n        # add a point of finish object annotation\n        self.sam_finish_annotation_button = QtWidgets.QPushButton()\n        self.sam_finish_annotation_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_finish_annotation_button.setText(\"Finish\")\n        # add icon to button\n        self.sam_finish_annotation_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/done.png\"))\n        # make the icon bigger\n        self.sam_finish_annotation_button.setIconSize(QtCore.QSize(24, 24))\n\n        self.sam_finish_annotation_button.clicked.connect(\n            self.sam_finish_annotation_button_clicked)\n        # set hover text\n        self.sam_finish_annotation_button.setToolTip(\n            f'Finish Annotation ({self._config[\"shortcuts\"][\"SAM_finish_annotation\"]} or ENTER)')\n        # set shortcut\n        self.sam_finish_annotation_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_finish_annotation\"])\n        self.sam_toolbar.addWidget(self.sam_finish_annotation_button)\n\n        # add a point of close SAM\n        self.sam_close_button = QtWidgets.QPushButton()\n        self.sam_close_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_close_button.setText(\"Manual\")\n        # add icon to button\n        self.sam_close_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/objects.png\"))\n        # make the icon bigger\n        self.sam_close_button.setIconSize(QtCore.QSize(24, 24))\n\n        self.sam_close_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_RESET\"])\n        self.sam_close_button.setToolTip(\n            f'Return to Manual Mode ({self._config[\"shortcuts\"][\"SAM_RESET\"]} or ESC)')\n        self.sam_close_button.clicked.connect(\n            self.sam_reset_button_clicked)\n        self.sam_toolbar.addWidget(self.sam_close_button)\n\n        # add a point of replace with SAM\n        self.sam_enhance_annotation_button = QtWidgets.QPushButton()\n        self.sam_enhance_annotation_button.setAccessibleName(\n            \"sam_enhance_annotation_button\")\n        self.sam_enhance_annotation_button.setStyleSheet(\n            \"QPushButton { font-size: 10pt; font-weight: bold; }\")\n        self.sam_enhance_annotation_button.setText(\"Enhance Polygons\")\n        # add icon to button\n        self.sam_enhance_annotation_button.setIcon(\n            QtGui.QIcon(\"labelme/icons/SAM.png\"))\n        # make the icon bigger\n        self.sam_enhance_annotation_button.setIconSize(QtCore.QSize(24, 24))\n        self.sam_enhance_annotation_button.setShortcut(\n            self._config[\"shortcuts\"][\"SAM_enhance\"])\n        self.sam_enhance_annotation_button.setToolTip(\n            f'Enhance Selected Polygons with SAM ({self._config[\"shortcuts\"][\"SAM_enhance\"]})')\n        self.sam_enhance_annotation_button.clicked.connect(\n            self.sam_enhance_annotation_button_clicked)\n        self.sam_toolbar.addWidget(self.sam_enhance_annotation_button)\n\n        self.set_sam_toolbar_enable(False)\n        self.sam_buttons_colors(\"x\")\n\n    def updateSamControls(self):\n        # remove all items from the combobox\n        self.sam_model_comboBox.clear()\n        # call the sam_models function to get all the models\n        self.sam_model_comboBox.addItem(\"Select Model (SAM disabled)\")\n        self.sam_model_comboBox.addItems(self.sam_models())\n\n    def sam_reset_button_clicked(self):\n        self.sam_clear_annotation_button_clicked()\n        self.setCreateMode()\n\n    def sam_enhance_annotation_button_clicked(self):\n        if self.sam_model_comboBox.currentText() == \"Select Model (SAM disabled)\":\n            MsgBox.OKmsgBox(\"SAM is disabled\",\n                            \"SAM is disabled.\\nPlease enable SAM.\")\n            return\n        try:\n            same_image = self.sam_predictor.check_image(\n                self.CURRENT_FRAME_IMAGE)\n        except:\n            return\n\n        toBeEnhanced = self.canvas.selectedShapes if len(\n            self.canvas.selectedShapes) > 0 else self.canvas.shapes\n\n        for shape in toBeEnhanced:\n            try:\n                self.canvas.shapes.remove(shape)\n                self.remLabels([shape])\n            except:\n                return\n            shapeX = mathOps.convert_qt_shapes_to_shapes([shape])[0]\n            x1, y1, x2, y2 = shapeX[\"bbox\"]\n            cur_bbox, cur_segment = self.sam_enhanced_bbox_segment(\n                self.CURRENT_FRAME_IMAGE, [x1, y1, x2, y2], 1.2, max_itr=5, forSHAPE=True)\n            shapeX[\"points\"] = cur_segment\n            shapeX = mathOps.convert_shapes_to_qt_shapes([shapeX])[0]\n            self.canvas.shapes.append(shapeX)\n            self.addLabel(shapeX)\n\n        if self.current_annotation_mode == \"video\":\n            self.update_current_frame_annotation_button_clicked()\n        else:\n            self.sam_clear_annotation_button_clicked()\n            self.refresh_image_MODE()\n\n        self.sam_buttons_colors(\"X\")\n\n    def sam_models(self):\n        cwd = os.getcwd()\n        with open(cwd + '/models_menu/sam_models.json') as f:\n            data = json.load(f)\n        # get all files in a directory\n        files = os.listdir(cwd + '/mmdetection/checkpoints/')\n        models = []\n        for model in data:\n            if model['checkpoint'].split('/')[-1] in files:\n                models.append(model['name'])\n        return models\n\n    def sam_model_comboBox_changed(self):\n        createFlag = self.canvas.mode == 0\n        self.canvas.cancelManualDrawing()\n        self.sam_clear_annotation_button_clicked()\n        self.sam_buttons_colors(\"X\")\n        if self.sam_model_comboBox.currentText() == \"Select Model (SAM disabled)\":\n            self.set_sam_toolbar_enable(False)\n            return\n        model_type = self.sam_model_comboBox.currentText()\n        self.waitWindow(\n            visible=True, text=f'Please Wait.\\n{model_type} is Loading...')\n        with open('models_menu/sam_models.json') as f:\n            data = json.load(f)\n        checkpoint_path = \"\"\n        for model in data:\n            if model['name'] == model_type:\n                checkpoint_path = model['checkpoint']\n        if checkpoint_path != \"\":\n            self.sam_predictor = Sam_Predictor(\n                model_type, checkpoint_path, device)\n        try:\n            self.sam_predictor.set_new_image(self.CURRENT_FRAME_IMAGE)\n        except:\n            print(\"please open an image first\")\n            self.waitWindow()\n            return\n        self.waitWindow()\n        print(\"done loading model\")\n\n        if createFlag:\n            self.setCreateMode()\n            if self.sam_last_mode == \"point\":\n                self.sam_add_point_button_clicked()\n            elif self.sam_last_mode == \"rectangle\":\n                self.sam_select_rect_button_clicked()\n        else:\n            self.setEditMode()\n\n    def sam_buttons_colors(self, mode):\n\n        setEnabled = False if self.sam_model_comboBox.currentText(\n        ) == \"Select Model (SAM disabled)\" else True\n        if not setEnabled:\n            self.set_sam_toolbar_enable(setEnabled)\n            self.set_sam_toolbar_colors(\"X\")\n            return\n\n        self.set_sam_toolbar_colors(mode)\n\n    def set_sam_toolbar_enable(self, setEnabled):\n        self.sam_add_point_button.setEnabled(setEnabled)\n        self.sam_remove_point_button.setEnabled(setEnabled)\n        self.sam_select_rect_button.setEnabled(setEnabled)\n        self.sam_clear_annotation_button.setEnabled(setEnabled)\n        self.sam_finish_annotation_button.setEnabled(setEnabled)\n\n    def set_sam_toolbar_colors(self, mode):\n        red, green, blue, trans = \"#2D7CFA;\", \"#2D7CFA;\", \"#2D7CFA;\", \"#4B515A;\"\n        hover_const = \"QPushButton::hover { background-color : \"\n        disabled_const = \"QPushButton:disabled { color : #7A7A7A} \"\n        style_sheet_const = \"QPushButton { font-size: 10pt; font-weight: bold; color: #ffffff; background-color: \"\n\n        [add_style, add_hover] = [green, green] if mode == \"add\" else [trans, green]\n        [remove_style, remove_hover] = [\n            red, red] if mode == \"remove\" else [trans, red]\n        [rect_style, rect_hover] = [\n            green, green] if mode == \"rect\" else [trans, green]\n        [clear_style, clear_hover] = [\n            red, red] if mode == \"clear\" else [trans, red]\n        [finish_style, finish_hover] = [\n            blue, blue] if mode == \"finish\" else [trans, blue]\n        [replace_style, replace_hover] = [\n            blue, blue] if mode == \"replace\" else [trans, blue]\n\n        self.sam_add_point_button.setStyleSheet(\n            style_sheet_const + add_style + \";}\" + hover_const + add_hover + \";}\" + disabled_const)\n        self.sam_remove_point_button.setStyleSheet(\n            style_sheet_const + remove_style + \";}\" + hover_const + remove_hover + \";}\" + disabled_const)\n        self.sam_select_rect_button.setStyleSheet(\n            style_sheet_const + rect_style + \";}\" + hover_const + rect_hover + \";}\" + disabled_const)\n        self.sam_clear_annotation_button.setStyleSheet(\n            style_sheet_const + clear_style + \";}\" + hover_const + clear_hover + \";}\" + disabled_const)\n        self.sam_finish_annotation_button.setStyleSheet(\n            style_sheet_const + finish_style + \";}\" + hover_const + finish_hover + \";}\" + disabled_const)\n        self.sam_enhance_annotation_button.setStyleSheet(\n            style_sheet_const + replace_style + \";}\" + hover_const + replace_hover + \";}\" + disabled_const)\n\n    def sam_add_point_button_clicked(self):\n        self.canvas.cancelManualDrawing()\n        self.sam_last_mode = \"point\"\n        self.sam_buttons_colors(\"add\")\n        try:\n            same_image = self.sam_predictor.check_image(\n                self.CURRENT_FRAME_IMAGE)\n        except:\n            self.sam_buttons_colors(\"x\")\n            return\n        if not same_image:\n            self.sam_clear_annotation_button_clicked()\n            self.sam_buttons_colors(\"add\")\n        self.canvas.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.CrossCursor))\n        self.canvas.SAM_mode = \"add point\"\n\n    def sam_remove_point_button_clicked(self):\n        self.canvas.cancelManualDrawing()\n        self.sam_buttons_colors(\"remove\")\n        try:\n            same_image = self.sam_predictor.check_image(\n                self.CURRENT_FRAME_IMAGE)\n        except:\n            self.sam_buttons_colors(\"x\")\n            return\n        if not same_image:\n            self.sam_clear_annotation_button_clicked()\n            self.sam_buttons_colors(\"remove\")\n        self.canvas.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.CrossCursor))\n        self.canvas.SAM_mode = \"remove point\"\n\n    def sam_select_rect_button_clicked(self):\n        self.canvas.cancelManualDrawing()\n        self.sam_last_mode = \"rectangle\"\n        self.sam_buttons_colors(\"rect\")\n        try:\n            same_image = self.sam_predictor.check_image(\n                self.CURRENT_FRAME_IMAGE)\n        except:\n            self.sam_buttons_colors(\"x\")\n            return\n        if not same_image:\n            self.sam_clear_annotation_button_clicked()\n            self.sam_buttons_colors(\"rect\")\n        self.canvas.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.CrossCursor))\n        self.canvas.SAM_mode = \"select rect\"\n\n    def sam_clear_annotation_button_clicked(self):\n        self.canvas.cancelManualDrawing()\n        self.sam_buttons_colors(\"clear\")\n        self.canvas.SAM_coordinates = []\n        self.canvas.SAM_mode = \"\"\n        self.canvas.SAM_rect = []\n        self.canvas.SAM_rects = []\n        self.current_sam_shape = None\n        try:\n            self.sam_predictor.clear_logit()\n        except:\n            pass\n        self.labelList.clear()\n        self.CURRENT_SHAPES_IN_IMG = mathOps.convert_qt_shapes_to_shapes(\n            self.canvas.shapes)\n        self.CURRENT_SHAPES_IN_IMG = self.check_sam_instance_in_shapes(\n            self.CURRENT_SHAPES_IN_IMG)\n\n        self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n\n    def sam_finish_annotation_button_clicked(self):\n        self.canvas.cancelManualDrawing()\n        self.sam_buttons_colors(\"finish\")\n        # return the cursor to normal\n        self.canvas.setCursor(QtGui.QCursor(QtCore.Qt.CursorShape.ArrowCursor))\n        self.canvas.SAM_coordinates = []\n        self.canvas.SAM_rect = []\n        self.canvas.SAM_rects = []\n        self.canvas.SAM_mode = \"finished\"\n        try:\n            self.sam_predictor.clear_logit()\n            if len(self.current_sam_shape) == 0:\n                return\n        except:\n            if self.sam_last_mode == \"point\":\n                self.sam_add_point_button_clicked()\n            elif self.sam_last_mode == \"rectangle\":\n                self.sam_select_rect_button_clicked()\n            return\n\n        self.labelList.clear()\n        sam_qt_shape = mathOps.convert_shapes_to_qt_shapes([self.current_sam_shape])[0]\n        self.canvas.SAM_current = sam_qt_shape\n        self.canvas.finalise(SAM_SHAPE=True)\n        self.CURRENT_SHAPES_IN_IMG = mathOps.convert_qt_shapes_to_shapes(\n            self.canvas.shapes)\n        self.CURRENT_SHAPES_IN_IMG = self.check_sam_instance_in_shapes(\n            self.CURRENT_SHAPES_IN_IMG)\n        try:\n            if self.current_sam_shape[\"group_id\"] != -1:\n                self.CURRENT_SHAPES_IN_IMG.append(self.current_sam_shape)\n\n            self.rec_frame_for_id(\n                self.current_sam_shape[\"group_id\"], self.INDEX_OF_CURRENT_FRAME)\n\n        except:\n            pass\n        self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n        # self.loadLabels(self.SAM_SHAPES_IN_IMAGE, replace=False)\n        # clear the predictor of the finished shape\n        self.sam_predictor.clear_logit()\n        self.canvas.SAM_coordinates = []\n        # explicitly clear instead of being overriden by the next shape\n        self.current_sam_shape = None\n        self.canvas.SAM_current = None\n        self.canvas.SAM_mode = \"\"\n\n        if self.current_annotation_mode == \"video\":\n            self.update_current_frame_annotation_button_clicked()\n        else:\n            self.canvas.shapes = mathOps.convert_shapes_to_qt_shapes(\n                self.CURRENT_SHAPES_IN_IMG)\n            self.sam_clear_annotation_button_clicked()\n            self.refresh_image_MODE()\n\n    def check_sam_instance_in_shapes(self, shapes):\n        if len(shapes) == 0:\n            return []\n        for shape in shapes:\n            if shape[\"label\"] == \"SAM instance\":\n                # remove the shape from the list\n                shapes.remove(shape)\n        return shapes\n\n    def run_sam_model(self):\n\n        if self.sam_predictor is None or self.sam_model_comboBox.currentText() == \"Select Model (SAM disabled)\":\n            print(\"please select a model\")\n            return\n\n        try:\n            same_image = self.sam_predictor.check_image(\n                self.CURRENT_FRAME_IMAGE)\n        except:\n            self.sam_buttons_colors(\"x\")\n            return\n\n        # prepre the input format for SAM\n\n        input_points, input_labels = mathOps.SAM_points_and_labels_from_coordinates(\n            self.canvas.SAM_coordinates)\n        input_boxes = mathOps.SAM_rects_to_boxes(self.canvas.SAM_rects)\n\n        mask, score = self.sam_predictor.predict(point_coords=input_points,\n                                                 point_labels=input_labels,\n                                                 box=input_boxes,\n                                                 image=self.CURRENT_FRAME_IMAGE)\n\n        points = mathOps.mask_to_polygons(mask)\n        shape = mathOps.polygon_to_shape(points, score)\n        self.current_sam_shape = shape\n        self.labelList.clear()\n\n        self.CURRENT_SHAPES_IN_IMG = mathOps.convert_qt_shapes_to_shapes(\n            self.canvas.shapes)\n        self.CURRENT_SHAPES_IN_IMG = self.check_sam_instance_in_shapes(\n            self.CURRENT_SHAPES_IN_IMG)\n\n        self.CURRENT_SHAPES_IN_IMG.append(self.current_sam_shape)\n        self.loadLabels(self.CURRENT_SHAPES_IN_IMG)\n\n    def turnOFF_SAM(self):\n        if self.sam_model_comboBox.currentText() != \"Select Model (SAM disabled)\":\n            self.sam_clear_annotation_button_clicked()\n        self.sam_buttons_colors('x')\n        self.set_sam_toolbar_enable(False)\n        self.canvas.SAM_mode = \"\"\n        self.canvas.SAM_coordinates = []\n        self.canvas.SAM_rect = []\n        self.canvas.SAM_rects = []\n        self.canvas.SAM_current = None\n\n    def turnON_SAM(self):\n        if self.sam_model_comboBox.currentText() == \"Select Model (SAM disabled)\":\n            return\n        self.sam_buttons_colors(\"X\")\n        self.set_sam_toolbar_enable(True)\n        self.canvas.SAM_mode = \"\"\n        self.canvas.SAM_coordinates = []\n        self.canvas.SAM_rect = []\n        self.canvas.SAM_rects = []\n        self.canvas.SAM_current = None\n\n    def sam_enhanced_bbox_segment(self, frameIMAGE, cur_bbox, thresh, max_itr=5, forSHAPE=False):\n        oldAREA = abs(cur_bbox[2] - cur_bbox[0]) * \\\n            abs(cur_bbox[3] - cur_bbox[1])\n        [x1, y1, x2, y2] = [cur_bbox[0], cur_bbox[1],\n                            cur_bbox[2], cur_bbox[3]]\n        listPOINTS = [min(x1, x2), min(y1, y2),\n                      max(x1, x2), max(y1, y2)]\n        listPOINTS = [int(round(x)) for x in listPOINTS]\n        input_boxes = [listPOINTS]\n        mask, score = self.sam_predictor.predict(point_coords=None,\n                                                 point_labels=None,\n                                                 box=input_boxes,\n                                                 image=frameIMAGE)\n        points = mathOps.mask_to_polygons(mask)\n        SAMshape = mathOps.polygon_to_shape(points, score)\n        cur_segment = SAMshape['points']\n        cur_segment = [[int(cur_segment[i]), int(cur_segment[i + 1])]\n                       for i in range(0, len(cur_segment), 2)]\n        cur_bbox = [min(np.array(cur_segment)[:, 0]), min(np.array(cur_segment)[:, 1]),\n                    max(np.array(cur_segment)[:, 0]), max(np.array(cur_segment)[:, 1])]\n        cur_bbox = [int(round(x)) for x in cur_bbox]\n        newAREA = abs(cur_bbox[2] - cur_bbox[0]) * \\\n            abs(cur_bbox[3] - cur_bbox[1])\n        bigger, smaller = max(oldAREA, newAREA), min(oldAREA, newAREA)\n        if bigger/smaller < thresh or max_itr == 1:\n            if forSHAPE:\n                return cur_bbox, SAMshape['points']\n            else:\n                return cur_bbox, cur_segment\n        else:\n            return self.sam_enhanced_bbox_segment(frameIMAGE, cur_bbox, thresh, max_itr-1, forSHAPE)\n\n    def load_objects_from_json__json(self):\n        if self.global_listObj != []:\n            return self.global_listObj\n        json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n        return mathOps.load_objects_from_json__json(json_file_name, self.TOTAL_VIDEO_FRAMES)\n\n    def load_objects_to_json__json(self, listObj):\n        self.global_listObj = listObj\n        json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n        mathOps.load_objects_to_json__json(json_file_name, listObj)\n\n    def load_objects_from_json__orjson(self):\n        if self.global_listObj != []:\n            return self.global_listObj\n        json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n        return mathOps.load_objects_from_json__orjson(json_file_name, self.TOTAL_VIDEO_FRAMES)\n\n    def load_objects_to_json__orjson(self, listObj):\n        self.global_listObj = listObj\n        json_file_name = f'{self.CURRENT_VIDEO_PATH}/{self.CURRENT_VIDEO_NAME}_tracking_results.json'\n        mathOps.load_objects_to_json__orjson(json_file_name, listObj)\n\n############################# important parameters across the gui ############################################\n# INDEX_OF_CURRENT_FRAME\n# self.FRAMES_TO_SKIP\n# frames to track\n# self.TOTAL_VIDEO_FRAMES\n# self.CURRENT_VIDEO_FPS   --> to be used to play the video at the correct speed\n# self.CAP\n# self.CLASS_NAMES_DICT\n# self.CURRENT_FRAME_IMAGE\n# self.CURRENT_VIDEO_NAME\n# self.CURRENT_VIDEO_PATH\n# self.CURRENT_SHAPES_IN_IMG\n\n# self.CURRENT_ANNOATAION_FLAGS = {\"traj\" : False  ,\n#                                 \"bbox\" : False  ,\n#                                   \"id\" : False ,\n#                                   \"class\" : True,\n#                                   \"mask\" : True}\n# to do\n# remove the video processing tool bar in the other cases\n##############################################################################################################\n\n\n\n########################################## FIXME #############################################################\n# before line 63\n# - [medium] Set max zoom value to something big enough for FitWidth/Window\n# TODO(unknown):\n# - [high] Add polygon movement with arrow keys\n# - [high] Deselect shape when clicking and already selected(?)\n# - [low,maybe] Preview images on file dialogs.\n# - Zoom is too \"steppy\".\n##############################################################################################################\n\n\n\n######################################## Tracking Thread #####################################################\n# class TrackingThread(QThread):\n#     def __init__(self, parent=None):\n#         super(TrackingThread, self).__init__(parent)\n#         self.parent = parent\n\n#     def run(self):\n#         self.parent.track_buttonClicked()\n\n\n# def track_buttonClicked_wrapper(self):\n#     # ...\n\n#     # Disable the track button\n#     # self.actions.track.setEnabled(False)\n\n#     # Create a thread to run the tracking process\n#     self.thread = TrackingThread(parent=self)\n#     self.thread.start()\n##############################################################################################################\n\n\n\n####################################### trackButtonClicked ###################################################\n# import psutil\n# if self.INDEX_OF_CURRENT_FRAME % 10 == 0:\n#     print(\n#         f\"Total Memory: {psutil.virtual_memory().total / 1024 ** 3} GB | Free Memory: {psutil.virtual_memory().free / 1024 ** 3} GB | Percent Used: {psutil.virtual_memory().percent} %\")\n##############################################################################################################\n\n\n\n########################################## remove unnecessery use ############################################\n# update_current_frame_annotation(self)\n##############################################################################################################\n"
  },
  {
    "path": "DLTA_AI_app/labelme/cli/__init__.py",
    "content": "# flake8: noqa\n\nfrom . import draw_json\nfrom . import draw_label_png\nfrom . import json_to_dataset\nfrom . import on_docker\n"
  },
  {
    "path": "DLTA_AI_app/labelme/cli/draw_json.py",
    "content": "#!/usr/bin/env python\n\nimport argparse\nimport sys\n\nimport imgviz\nimport matplotlib.pyplot as plt\n\nfrom labelme.label_file import LabelFile\nfrom labelme import utils\n\n\nPY2 = sys.version_info[0] == 2\n\n\ndef main():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"json_file\")\n    args = parser.parse_args()\n\n    label_file = LabelFile(args.json_file)\n    img = utils.img_data_to_arr(label_file.imageData)\n\n    label_name_to_value = {\"_background_\": 0}\n    for shape in sorted(label_file.shapes, key=lambda x: x[\"label\"]):\n        label_name = shape[\"label\"]\n        if label_name in label_name_to_value:\n            label_value = label_name_to_value[label_name]\n        else:\n            label_value = len(label_name_to_value)\n            label_name_to_value[label_name] = label_value\n    lbl, _ = utils.shapes_to_label(\n        img.shape, label_file.shapes, label_name_to_value\n    )\n\n    label_names = [None] * (max(label_name_to_value.values()) + 1)\n    for name, value in label_name_to_value.items():\n        label_names[value] = name\n    lbl_viz = imgviz.label2rgb(\n        label=lbl,\n        img=imgviz.asgray(img),\n        label_names=label_names,\n        font_size=30,\n        loc=\"rb\",\n    )\n\n    plt.subplot(121)\n    plt.imshow(img)\n    plt.subplot(122)\n    plt.imshow(lbl_viz)\n    plt.show()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/cli/draw_label_png.py",
    "content": "import argparse\n\nimport imgviz\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport PIL.Image\n\nfrom labelme.logger import logger\n\n\ndef main():\n    parser = argparse.ArgumentParser(\n        formatter_class=argparse.ArgumentDefaultsHelpFormatter\n    )\n    parser.add_argument(\"label_png\", help=\"label PNG file\")\n    args = parser.parse_args()\n\n    lbl = np.asarray(PIL.Image.open(args.label_png))\n\n    logger.info(\"label shape: {}\".format(lbl.shape))\n    logger.info(\"unique label values: {}\".format(np.unique(lbl)))\n\n    lbl_viz = imgviz.label2rgb(lbl)\n    plt.imshow(lbl_viz)\n    plt.show()\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/cli/json_to_dataset.py",
    "content": "import argparse\nimport base64\nimport json\nimport os\nimport os.path as osp\n\nimport imgviz\nimport PIL.Image\n\nfrom labelme.logger import logger\nfrom labelme import utils\n\n\ndef main():\n    logger.warning(\n        \"This script is aimed to demonstrate how to convert the \"\n        \"JSON file to a single image dataset.\"\n    )\n    logger.warning(\n        \"It won't handle multiple JSON files to generate a \"\n        \"real-use dataset.\"\n    )\n\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"json_file\")\n    parser.add_argument(\"-o\", \"--out\", default=None)\n    args = parser.parse_args()\n\n    json_file = args.json_file\n\n    if args.out is None:\n        out_dir = osp.basename(json_file).replace(\".\", \"_\")\n        out_dir = osp.join(osp.dirname(json_file), out_dir)\n    else:\n        out_dir = args.out\n    if not osp.exists(out_dir):\n        os.mkdir(out_dir)\n\n    data = json.load(open(json_file))\n    imageData = data.get(\"imageData\")\n\n    if not imageData:\n        imagePath = os.path.join(os.path.dirname(json_file), data[\"imagePath\"])\n        with open(imagePath, \"rb\") as f:\n            imageData = f.read()\n            imageData = base64.b64encode(imageData).decode(\"utf-8\")\n    img = utils.img_b64_to_arr(imageData)\n\n    label_name_to_value = {\"_background_\": 0}\n    for shape in sorted(data[\"shapes\"], key=lambda x: x[\"label\"]):\n        label_name = shape[\"label\"]\n        if label_name in label_name_to_value:\n            label_value = label_name_to_value[label_name]\n        else:\n            label_value = len(label_name_to_value)\n            label_name_to_value[label_name] = label_value\n    lbl, _ = utils.shapes_to_label(\n        img.shape, data[\"shapes\"], label_name_to_value\n    )\n\n    label_names = [None] * (max(label_name_to_value.values()) + 1)\n    for name, value in label_name_to_value.items():\n        label_names[value] = name\n\n    lbl_viz = imgviz.label2rgb(\n        label=lbl, img=imgviz.asgray(img), label_names=label_names, loc=\"rb\"\n    )\n\n    PIL.Image.fromarray(img).save(osp.join(out_dir, \"img.png\"))\n    utils.lblsave(osp.join(out_dir, \"label.png\"), lbl)\n    PIL.Image.fromarray(lbl_viz).save(osp.join(out_dir, \"label_viz.png\"))\n\n    with open(osp.join(out_dir, \"label_names.txt\"), \"w\") as f:\n        for lbl_name in label_names:\n            f.write(lbl_name + \"\\n\")\n\n    logger.info(\"Saved to: {}\".format(out_dir))\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/cli/on_docker.py",
    "content": "#!/usr/bin/env python\n\nfrom __future__ import print_function\n\nimport argparse\nimport distutils.spawn\nimport json\nimport os\nimport os.path as osp\nimport platform\nimport shlex\nimport subprocess\nimport sys\n\n\ndef get_ip():\n    dist = platform.platform().split(\"-\")[0]\n    if dist == \"Linux\":\n        return \"\"\n    elif dist == \"Darwin\":\n        cmd = \"ifconfig en0\"\n        output = subprocess.check_output(shlex.split(cmd))\n        if str != bytes:  # Python3\n            output = output.decode(\"utf-8\")\n        for row in output.splitlines():\n            cols = row.strip().split(\" \")\n            if cols[0] == \"inet\":\n                ip = cols[1]\n                return ip\n        else:\n            raise RuntimeError(\"No ip is found.\")\n    else:\n        raise RuntimeError(\"Unsupported platform.\")\n\n\ndef labelme_on_docker(in_file, out_file):\n    ip = get_ip()\n    cmd = \"xhost + %s\" % ip\n    subprocess.check_output(shlex.split(cmd))\n\n    if out_file:\n        out_file = osp.abspath(out_file)\n        if osp.exists(out_file):\n            raise RuntimeError(\"File exists: %s\" % out_file)\n        else:\n            open(osp.abspath(out_file), \"w\")\n\n    cmd = (\n        \"docker run -it --rm\"\n        \" -e DISPLAY={0}:0\"\n        \" -e QT_X11_NO_MITSHM=1\"\n        \" -v /tmp/.X11-unix:/tmp/.X11-unix\"\n        \" -v {1}:{2}\"\n        \" -w /home/developer\"\n    )\n    in_file_a = osp.abspath(in_file)\n    in_file_b = osp.join(\"/home/developer\", osp.basename(in_file))\n    cmd = cmd.format(\n        ip,\n        in_file_a,\n        in_file_b,\n    )\n    if out_file:\n        out_file_a = osp.abspath(out_file)\n        out_file_b = osp.join(\"/home/developer\", osp.basename(out_file))\n        cmd += \" -v {0}:{1}\".format(out_file_a, out_file_b)\n    cmd += \" wkentaro/labelme labelme {0}\".format(in_file_b)\n    if out_file:\n        cmd += \" -O {0}\".format(out_file_b)\n    subprocess.call(shlex.split(cmd))\n\n    if out_file:\n        try:\n            json.load(open(out_file))\n            return out_file\n        except Exception:\n            if open(out_file).read() == \"\":\n                os.remove(out_file)\n            raise RuntimeError(\"Annotation is cancelled.\")\n\n\ndef main():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\"in_file\", help=\"Input file or directory.\")\n    parser.add_argument(\"-O\", \"--output\")\n    args = parser.parse_args()\n\n    if not distutils.spawn.find_executable(\"docker\"):\n        print(\"Please install docker\", file=sys.stderr)\n        sys.exit(1)\n\n    try:\n        out_file = labelme_on_docker(args.in_file, args.output)\n        if out_file:\n            print(\"Saved to: %s\" % out_file)\n    except RuntimeError as e:\n        sys.stderr.write(e.__str__() + \"\\n\")\n        sys.exit(1)\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/config/__init__.py",
    "content": "import os.path as osp\nimport shutil\n\nimport yaml\n\nfrom labelme.logger import logger\n\n\nhere = osp.dirname(osp.abspath(__file__))\n\n\ndef update_dict(target_dict, new_dict, validate_item=None):\n    for key, value in new_dict.items():\n        if validate_item:\n            validate_item(key, value)\n        if key not in target_dict:\n            logger.warn(\"Skipping unexpected key in config: {}\".format(key))\n            continue\n        if isinstance(target_dict[key], dict) and isinstance(value, dict):\n            update_dict(target_dict[key], value, validate_item=validate_item)\n        else:\n            target_dict[key] = value\n\n\n# -----------------------------------------------------------------------------\n\n\ndef get_default_config():\n    config_file = osp.join(here, \"default_config.yaml\")\n    with open(config_file) as f:\n        config = yaml.safe_load(f)\n\n    # save default config to ~/.labelmerc\n    user_config_file = osp.join(osp.expanduser(\"~\"), \".labelmerc\")\n    if not osp.exists(user_config_file):\n        try:\n            shutil.copy(config_file, user_config_file)\n        except Exception:\n            logger.warn(\"Failed to save config: {}\".format(user_config_file))\n\n    return config\n\n\ndef validate_config_item(key, value):\n    if key == \"validate_label\" and value not in [None, \"exact\"]:\n        raise ValueError(\n            \"Unexpected value for config key 'validate_label': {}\".format(\n                value\n            )\n        )\n    if key == \"shape_color\" and value not in [None, \"auto\", \"manual\"]:\n        raise ValueError(\n            \"Unexpected value for config key 'shape_color': {}\".format(value)\n        )\n    if key == \"labels\" and value is not None and len(value) != len(set(value)):\n        raise ValueError(\n            \"Duplicates are detected for config key 'labels': {}\".format(value)\n        )\n\n\ndef get_config(config_file_or_yaml=None, config_from_args=None):\n    # 1. default config\n    config = get_default_config()\n\n    # 2. specified as file or yaml\n    if config_file_or_yaml is not None:\n        config_from_yaml = yaml.safe_load(config_file_or_yaml)\n        if not isinstance(config_from_yaml, dict):\n            with open(config_from_yaml) as f:\n                logger.info(\n                    \"Loading config file from: {}\".format(config_from_yaml)\n                )\n                config_from_yaml = yaml.safe_load(f)\n        update_dict(\n            config, config_from_yaml, validate_item=validate_config_item\n        )\n\n    # 3. command line argument or specified config file\n    if config_from_args is not None:\n        update_dict(\n            config, config_from_args, validate_item=validate_config_item\n        )\n\n    return config\n"
  },
  {
    "path": "DLTA_AI_app/labelme/config/default_config.yaml",
    "content": "auto_save: false\r\ncanvas:\r\n  double_click: close\r\n  num_backups: 10\r\ndefault_classes:\r\n- person\r\n- bicycle\r\n- car\r\n- motorcycle\r\n- airplane\r\n- bus\r\n- train\r\n- truck\r\n- boat\r\n- traffic light\r\n- fire hydrant\r\n- stop sign\r\n- parking meter\r\n- bench\r\n- bird\r\n- cat\r\n- dog\r\n- horse\r\n- sheep\r\n- cow\r\n- elephant\r\n- bear\r\n- zebra\r\n- giraffe\r\n- backpack\r\n- umbrella\r\n- handbag\r\n- tie\r\n- suitcase\r\n- frisbee\r\n- skis\r\n- snowboard\r\n- sports ball\r\n- kite\r\n- baseball bat\r\n- baseball glove\r\n- skateboard\r\n- surfboard\r\n- tennis racket\r\n- bottle\r\n- wine glass\r\n- cup\r\n- fork\r\n- knife\r\n- spoon\r\n- bowl\r\n- banana\r\n- apple\r\n- sandwich\r\n- orange\r\n- broccoli\r\n- carrot\r\n- hot dog\r\n- pizza\r\n- donut\r\n- cake\r\n- chair\r\n- couch\r\n- potted plant\r\n- bed\r\n- dining table\r\n- toilet\r\n- tv\r\n- laptop\r\n- mouse\r\n- remote\r\n- keyboard\r\n- cell phone\r\n- microwave\r\n- oven\r\n- toaster\r\n- sink\r\n- refrigerator\r\n- book\r\n- clock\r\n- vase\r\n- scissors\r\n- teddy bear\r\n- hair drier\r\n- toothbrush\r\ndefault_shape_color:\r\n- 0\r\n- 255\r\n- 0\r\ndisplay_label_popup: true\r\nepsilon: 10.0\r\nfile_dock:\r\n  closable: true\r\n  floatable: true\r\n  movable: true\r\n  show: true\r\nfile_search: null\r\nfit_to_content:\r\n  column: true\r\n  row: false\r\nflag_dock:\r\n  closable: true\r\n  floatable: true\r\n  movable: true\r\n  show: true\r\nflags: null\r\nkeep_prev: false\r\nkeep_prev_brightness: false\r\nkeep_prev_contrast: false\r\nkeep_prev_scale: false\r\nlabel_colors: null\r\nlabel_completion: startswith\r\nlabel_dock:\r\n  closable: true\r\n  floatable: true\r\n  movable: true\r\n  show: true\r\nlabel_flags: null\r\nlabels: null\r\nlogger_level: info\r\nmute: false\r\nshape:\r\n  fill_color:\r\n  - 0\r\n  - 255\r\n  - 0\r\n  - 0\r\n  hvertex_fill_color:\r\n  - 255\r\n  - 255\r\n  - 255\r\n  - 255\r\n  line_color:\r\n  - 0\r\n  - 255\r\n  - 0\r\n  - 128\r\n  select_fill_color:\r\n  - 0\r\n  - 255\r\n  - 0\r\n  - 155\r\n  select_line_color:\r\n  - 255\r\n  - 255\r\n  - 255\r\n  - 255\r\n  vertex_fill_color:\r\n  - 0\r\n  - 255\r\n  - 0\r\n  - 255\r\nshape_color: auto\r\nshape_dock:\r\n  closable: true\r\n  floatable: true\r\n  movable: true\r\n  show: true\r\nshift_auto_shape_color: 0\r\nshortcuts:\r\n  SAM_RESET: X\r\n  SAM_add_point: A\r\n  SAM_clear: C\r\n  SAM_enhance: shift+E\r\n  SAM_finish_annotation: F\r\n  SAM_remove_point: R\r\n  SAM_select_rect: B\r\n  add_point_to_edge: Ctrl+Shift+P\r\n  clear_annotations: shift+D\r\n  close: Ctrl+W\r\n  copy: Ctrl+C\r\n  create_circle: null\r\n  create_line: null\r\n  create_linestrip: null\r\n  create_point: null\r\n  create_polygon: Ctrl+N\r\n  create_rectangle: null\r\n  delete_file: Ctrl+Delete\r\n  delete_polygon: Delete\r\n  duplicate_polygon: Ctrl+D\r\n  edit_label: Ctrl+L\r\n  edit_polygon: Ctrl+J\r\n  export: Ctrl+E\r\n  export_video: shift+V\r\n  fit_width: Ctrl+Shift+F\r\n  fit_window: null\r\n  ignore_updates: shift+Z\r\n  interpolate: shift+I\r\n  mark_as_key: shift+M\r\n  next_1: Right\r\n  next_x: Up\r\n  open: Ctrl+O\r\n  open_dir: Ctrl+U\r\n  open_next: null\r\n  open_prev: null\r\n  open_video: Ctrl+M\r\n  open_video_frames: Ctrl+F\r\n  paste: Ctrl+V\r\n  play: Space\r\n  prev_1: Left\r\n  prev_x: Down\r\n  quit: Ctrl+Q\r\n  save: Ctrl+S\r\n  save_as: Ctrl+Shift+S\r\n  save_to: null\r\n  scale: shift+S\r\n  stop: Escape\r\n  toggle_keep_prev_mode: Ctrl+P\r\n  track: shift+T\r\n  track_assigned: shift+A\r\n  track_full: shift+F\r\n  undo: Ctrl+Z\r\n  undo_last_point: Backspace\r\n  update_frame: shift+U\r\n  zoom_in: Ctrl++\r\n  zoom_out: Ctrl+-\r\n  zoom_to_original: Ctrl+0\r\nshow_cross_line: true\r\nshow_label_text_field: true\r\nsort_labels: true\r\nstore_data: true\r\ntheme: auto\r\nvalidate_label: null\r\nvis_dock:\r\n  closable: true\r\n  floatable: true\r\n  movable: true\r\n  show: true\r\n"
  },
  {
    "path": "DLTA_AI_app/labelme/config/default_config_base.yaml",
    "content": "auto_save: false\ncanvas:\n  double_click: close\n  num_backups: 10\ndefault_classes:\n- person\n- bicycle\n- car\n- motorcycle\n- airplane\n- bus\n- train\n- truck\n- boat\n- traffic light\n- fire hydrant\n- stop sign\n- parking meter\n- bench\n- bird\n- cat\n- dog\n- horse\n- sheep\n- cow\n- elephant\n- bear\n- zebra\n- giraffe\n- backpack\n- umbrella\n- handbag\n- tie\n- suitcase\n- frisbee\n- skis\n- snowboard\n- sports ball\n- kite\n- baseball bat\n- baseball glove\n- skateboard\n- surfboard\n- tennis racket\n- bottle\n- wine glass\n- cup\n- fork\n- knife\n- spoon\n- bowl\n- banana\n- apple\n- sandwich\n- orange\n- broccoli\n- carrot\n- hot dog\n- pizza\n- donut\n- cake\n- chair\n- couch\n- potted plant\n- bed\n- dining table\n- toilet\n- tv\n- laptop\n- mouse\n- remote\n- keyboard\n- cell phone\n- microwave\n- oven\n- toaster\n- sink\n- refrigerator\n- book\n- clock\n- vase\n- scissors\n- teddy bear\n- hair drier\n- toothbrush\ndefault_shape_color:\n- 0\n- 255\n- 0\ndisplay_label_popup: true\nepsilon: 10.0\nfile_dock:\n  closable: true\n  floatable: true\n  movable: true\n  show: true\nfile_search: null\nfit_to_content:\n  column: true\n  row: false\nflag_dock:\n  closable: true\n  floatable: true\n  movable: true\n  show: true\nflags: null\nkeep_prev: false\nkeep_prev_brightness: false\nkeep_prev_contrast: false\nkeep_prev_scale: false\nlabel_colors: null\nlabel_completion: startswith\nlabel_dock:\n  closable: true\n  floatable: true\n  movable: true\n  show: true\nlabel_flags: null\nlabels: null\nlogger_level: info\nmute: false\nshape:\n  fill_color:\n  - 0\n  - 255\n  - 0\n  - 0\n  hvertex_fill_color:\n  - 255\n  - 255\n  - 255\n  - 255\n  line_color:\n  - 0\n  - 255\n  - 0\n  - 128\n  select_fill_color:\n  - 0\n  - 255\n  - 0\n  - 155\n  select_line_color:\n  - 255\n  - 255\n  - 255\n  - 255\n  vertex_fill_color:\n  - 0\n  - 255\n  - 0\n  - 255\nshape_color: auto\nshape_dock:\n  closable: true\n  floatable: true\n  movable: true\n  show: true\nshift_auto_shape_color: 0\nshortcuts:\n  SAM_RESET: X\n  SAM_add_point: A\n  SAM_clear: C\n  SAM_enhance: shift+E\n  SAM_finish_annotation: F\n  SAM_remove_point: R\n  SAM_select_rect: B\n  add_point_to_edge: Ctrl+Shift+P\n  clear_annotations: shift+D\n  close: Ctrl+W\n  copy: Ctrl+C\n  create_circle: null\n  create_line: null\n  create_linestrip: null\n  create_point: null\n  create_polygon: Ctrl+N\n  create_rectangle: null\n  delete_file: Ctrl+Delete\n  delete_polygon: Delete\n  duplicate_polygon: Ctrl+D\n  edit_label: Ctrl+L\n  edit_polygon: Ctrl+J\n  export: Ctrl+E\n  export_video: shift+V\n  fit_width: Ctrl+Shift+F\n  fit_window: null\n  ignore_updates: shift+Z\n  interpolate: shift+I\n  mark_as_key: shift+M\n  next_1: Right\n  next_x: Up\n  open: Ctrl+O\n  open_dir: Ctrl+U\n  open_next: null\n  open_prev: null\n  open_video: Ctrl+M\n  open_video_frames : Ctrl+F\n  paste: Ctrl+V\n  play: Space\n  prev_1: Left\n  prev_x: Down\n  quit: Ctrl+Q\n  save: Ctrl+S\n  save_as: Ctrl+Shift+S\n  save_to: null\n  scale: shift+S\n  stop: Escape\n  toggle_keep_prev_mode: Ctrl+P\n  track: shift+T\n  track_assigned: shift+A\n  track_full: shift+F\n  undo: Ctrl+Z\n  undo_last_point: Backspace\n  update_frame: shift+U\n  zoom_in: Ctrl++\n  zoom_out: Ctrl+-\n  zoom_to_original: Ctrl+0\nshow_cross_line: true\nshow_label_text_field: true\nsort_labels: true\nstore_data: true\ntheme: auto\nvalidate_label: null\nvis_dock:\n  closable: true\n  floatable: true\n  movable: true\n  show: true\n"
  },
  {
    "path": "DLTA_AI_app/labelme/intelligence.py",
    "content": "from ultralytics import YOLO\nimport json\nimport time\ntry:\n    from inferencing import models_inference\nexcept ModuleNotFoundError:\n    import subprocess\n    print(\"The required package 'mmcv-full' is not currently installed. It will now be installed. This process may take some time. Note that this package will only be installed the first time you use DLTA-AI.\")\n    subprocess.run([\"mim\", \"install\", \"mmcv-full==1.7.0\"])\n    from inferencing import models_inference\nfrom labelme.label_file import LabelFile\nfrom labelme import PY2\nfrom PyQt6.QtCore import QThread\nfrom PyQt6.QtCore import pyqtSignal as pyqtSignal\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\nimport os\nimport os.path as osp\nimport warnings\nimport yaml\nfrom .utils.helpers.mathOps import color_palette\nimport torch\nfrom mmdet.apis import init_detector\nwarnings.filterwarnings(\"ignore\")\n\nfrom .widgets.MsgBox import OKmsgBox\nfrom .utils.helpers import mathOps\n\n\ncoco_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',\n                'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n                'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n# make a list of 12 unique colors as we will use them to draw bounding boxes of different classes in different colors\n# so the calor palette will be used to draw bounding boxes of different classes in different colors\n# the color pallette should have the famous 12 colors as red, green, blue, yellow, cyan, magenta, white, black, gray, brown, pink, and orange in bgr format\n\n\nclass IntelligenceWorker(QThread):\n    sinOut = pyqtSignal(int, int)\n\n    def __init__(self, parent, images, source,multi_model_flag=False):\n        super(IntelligenceWorker, self).__init__(parent)\n        self.parent = parent\n        self.source = source\n        self.images = images\n        self.multi_model_flag = multi_model_flag\n        self.notif = []\n\n    def run(self):\n        index = 0\n        total = len(self.images)\n        for filename in self.images:\n\n            if self.parent.isVisible == False:\n                return\n            if self.source.operationCanceled == True:\n                return\n            index = index + 1\n            json_name = osp.splitext(filename)[0] + \".json\"\n            # if os.path.exists(json_name)==False:\n\n            if os.path.isdir(json_name):\n                os.remove(json_name)\n\n            try:\n                print(\"Decoding \"+filename)\n                if self.multi_model_flag:\n                    s = self.source.get_shapes_of_one(filename, multi_model_flag=True)\n                else:\n                    s = self.source.get_shapes_of_one(filename)\n                s = mathOps.convert_shapes_to_qt_shapes(s)\n                self.source.saveLabelFile(filename, s)\n            except Exception as e:\n                print(e)\n            self.sinOut.emit(index, total)\n\n\n\nclass Intelligence():\n    def __init__(self, parent):\n        self.reader = models_inference()\n        self.parent = parent\n        self.conf_threshold = 0.3\n        self.iou_threshold = 0.5\n        with open (\"labelme/config/default_config.yaml\") as f:\n            self.config = yaml.load(f, Loader=yaml.FullLoader)\n        self.default_classes = self.config[\"default_classes\"]\n        try:\n            self.selectedclasses = {}\n            for class_ in self.default_classes:\n                if class_ in coco_classes:\n                    index = coco_classes.index(class_)\n                    self.selectedclasses[index] = class_\n        except:\n            self.selectedclasses = {i:class_ for i,class_ in enumerate(coco_classes)}\n            print(\"error in loading the default classes from the config file, so we will use all the coco classes\")\n        self.selectedmodels = []\n        self.current_model_name, self.current_mm_model = self.make_mm_model(\"\")\n\n    @torch.no_grad()\n    def make_mm_model(self, selected_model_name):\n        try:\n            with open(\"saved_models.json\") as json_file:\n                data = json.load(json_file)\n                if selected_model_name == \"\":\n                    # read the saved_models.json file and import the config and checkpoint files from the first model\n                    selected_model_name = list(data.keys())[0]\n                    config = data[selected_model_name][\"config\"]\n                    checkpoint = data[selected_model_name][\"checkpoint\"]\n                else:\n                    config = data[selected_model_name][\"config\"]\n                    checkpoint = data[selected_model_name][\"checkpoint\"]\n                print(\n                    f'selected model : {selected_model_name} \\nconfig : {config}\\ncheckpoint : {checkpoint} \\n')\n        except Exception as e:\n            OKmsgBox(\"Error\", f\"Error in loading the model\\n{e}\", \"critical\")\n            return\n\n\n        torch.cuda.empty_cache()\n        if \"YOLOv8\" in selected_model_name:\n            model = YOLO(checkpoint)\n            model.fuse()\n            return selected_model_name, model\n\n        try:\n            print(f\"From the working one: {config}\")\n            model = init_detector(config,\n                                  checkpoint,\n                                  device=torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\"))\n        except:\n            print(\n                \"Error in loading the model, please check if the config and checkpoint files do exist\")\n\n            #    cfg_options= dict(iou_threshold=0.2))\n\n        # \"C:\\Users\\Shehab\\Desktop\\l001\\ANNOTATION_TOOL\\mmdetection\\mmdetection\\configs\\yolact\\yolact_r50_1x8_coco.py\"\n        # model = init_detector(\"C:/Users/Shehab/Desktop/mmdetection/mmdetection/configs/detectors/htc_r50_sac_1x_coco.py\",\n            # \"C:/Users/Shehab/Desktop/mmdetection/mmdetection/checkpoints/htc_r50_sac_1x_coco-bfa60c54.pth\", device = torch.device(\"cuda\"))\n        return selected_model_name, model\n\n    @ torch.no_grad()\n    def make_mm_model_more(self, selected_model_name, config, checkpoint):\n        torch.cuda.empty_cache()\n        print(\n            f\"Selected model is {selected_model_name}\\n and config is {config}\\n and checkpoint is {checkpoint}\")\n\n        # if YOLOv8\n        if \"YOLOv8\" in selected_model_name:\n            try:\n                model = YOLO(checkpoint)\n                model.fuse()\n                return selected_model_name, model\n            except Exception as e:\n                OKmsgBox(\"Error\", f\"Error in loading the model\\n{e}\", \"critical\")\n                return\n\n        # It's a MMDetection model\n        else:\n            try:\n                print(f\"From the new one: {config}\")\n                model = init_detector(config, checkpoint, device=torch.device(\n                    \"cuda\" if torch.cuda.is_available() else \"cpu\"))\n            except Exception as e:\n                OKmsgBox\n                OKmsgBox(\"Error\", f\"Error in loading the model\\n{e}\", \"critical\")\n                return\n            return selected_model_name, model\n\n    def get_shapes_of_one(self, image, img_array_flag=False, multi_model_flag=False):\n        # print(f\"Threshold is {self.conf_threshold}\")\n        # results = self.reader.decode_file(img_path = filename, threshold = self.conf_threshold , selected_model_name = self.current_model_name)[\"results\"]\n        start_time = time.time()\n        # if img_array_flag is true then the image is a numpy array and not a path\n        if multi_model_flag:\n            # to handle the case of the user selecting no models\n            if len(self.selectedmodels) == 0:\n                return []\n            self.reader.annotating_models.clear()\n            for model_name in self.selectedmodels:\n                self.current_model_name, self.current_mm_model = self.make_mm_model(\n                    model_name)\n                if img_array_flag:\n                    results0, results1 = self.reader.decode_file(\n                        img=image, model=self.current_mm_model, classdict=self.selectedclasses, threshold=self.conf_threshold, img_array_flag=True)\n                else:\n                    results0, results1 = self.reader.decode_file(\n                        img=image, model=self.current_mm_model, classdict=self.selectedclasses, threshold=self.conf_threshold)\n                self.reader.annotating_models[model_name] = [\n                    results0, results1]\n                end_time = time.time()\n                print(\n                    f\"Time taken to annoatate img on {self.current_model_name}: {int((end_time - start_time)*1000)} ms\" + \"\\n\")\n            print('merging masks')\n            results0, results1 = self.reader.merge_masks()\n            results = self.reader.polegonise(\n                results0, results1, classdict=self.selectedclasses, threshold=self.conf_threshold)['results']\n\n        else:\n            if img_array_flag:\n                results = self.reader.decode_file(\n                    img=image, model=self.current_mm_model, classdict=self.selectedclasses, threshold=self.conf_threshold, img_array_flag=True)\n                # print(type(results))\n                if isinstance(results, tuple):\n                    results = self.reader.polegonise(\n                        results[0], results[1], classdict=self.selectedclasses, threshold=self.conf_threshold)['results']\n                else:\n                    results = results['results']\n            else:\n                results = self.reader.decode_file(\n                    img=image, model=self.current_mm_model, classdict=self.selectedclasses, threshold=self.conf_threshold)\n                if isinstance(results, tuple):\n                    results = self.reader.polegonise(\n                        results[0], results[1], classdict=self.selectedclasses, threshold=self.conf_threshold)['results']\n                else:\n                    results = results['results']\n            end_time = time.time()\n            print(\n                f\"Time taken to annoatate img on {self.current_model_name}: {int((end_time - start_time)*1000)} ms\")\n\n        shapes = []\n        for result in results:\n            shape = {}\n            shape[\"label\"] = result[\"class\"]\n            shape[\"content\"] = result[\"confidence\"]\n            shape[\"group_id\"] = None\n            shape[\"shape_type\"] = \"polygon\"\n            shape[\"bbox\"] = mathOps.get_bbox_xyxy(result[\"seg\"])\n\n            shape[\"flags\"] = {}\n            shape[\"other_data\"] = {}\n\n            # shape_points is result[\"seg\"] flattened\n            shape[\"points\"] = [item for sublist in result[\"seg\"]\n                               for item in sublist]\n\n            shapes.append(shape)\n            shapes, boxes, confidences, class_ids, segments = mathOps.OURnms_confidenceBased(\n                shapes, self.iou_threshold)\n            # self.addLabel(shape)\n        return shapes\n\n    # print the labels of the selected classes in the dialog\n    # def updatlabellist(self):\n    #     for selectedclass in self.selectedclasses.values():\n    #         shape = Shape()\n    #         shape.label = selectedclass\n    #         shape.content = \"\"\n    #         shape.shape_type=\"polygon\"\n    #         shape.flags = {}\n    #         shape.other_data = {}\n    #         mainwindow = self.parent\n    #         mainwindow.addLabel(shape)\n\n    def get_shapes_of_batch(self, images, multi_model_flag=False, notif = []):\n        self.pd = self.startOperationDialog()\n        self.thread = IntelligenceWorker(self.parent, images, self, multi_model_flag)\n        self.thread.sinOut.connect(self.updateDialog)\n        self.thread.start()\n        self.notif = notif\n\n    def updateDialog(self, completed, total):\n        progress = int(completed/total*100)\n        self.pd.setLabelText(str(completed) + \"/\" + str(total))\n        self.pd.setValue(progress)\n        if completed == total:\n            self.onProgressDialogCanceledOrCompleted()\n\n\n    def startOperationDialog(self):\n        self.operationCanceled = False\n        pd1 = QtWidgets.QProgressDialog(\n            'Progress', 'Cancel', 0, 100, self.parent)\n        pd1.setLabelText('Progress')\n        pd1.setCancelButtonText('Cancel')\n        pd1.setRange(0, 100)\n        pd1.setValue(0)\n        pd1.setMinimumDuration(0)\n        pd1.show()\n        pd1.canceled.connect(self.onProgressDialogCanceledOrCompleted)\n        return pd1\n\n    def onProgressDialogCanceledOrCompleted(self):\n        try:\n            if not self.notif[0] and not self.notif[1].isActiveWindow():\n                self.notif[2](\"Batch Annotation Completed\")\n        except:\n            print(\"Error in batch mode notification\")\n        self.operationCanceled = True\n        if self.parent.lastOpenDir and osp.exists(self.parent.lastOpenDir):\n            self.parent.importDirImages(self.parent.lastOpenDir)\n        else:\n            self.parent.loadFile(self.parent.filename)\n\n\n    def clear_annotating_models(self):\n        self.reader.annotating_models.clear()\n\n    def saveLabelFile(self, filename, detectedShapes):\n        lf = LabelFile()\n\n        def format_shape(s):\n            data = s.other_data.copy()\n            data.update(\n                dict(\n                    label=s.label.encode(\"utf-8\") if PY2 else s.label,\n                    points=mathOps.flattener(s.points),\n                    bbox=s.bbox,\n                    group_id=s.group_id,\n                    content=s.content,\n                    shape_type=s.shape_type,\n                    flags=s.flags,\n                )\n            )\n            return data\n\n        shapes = [format_shape(item) for item in detectedShapes]\n\n        imageData = LabelFile.load_image_file(filename)\n        image = QtGui.QImage.fromData(imageData)\n        if osp.dirname(filename) and not osp.exists(osp.dirname(filename)):\n            os.makedirs(osp.dirname(filename))\n        json_name = osp.splitext(filename)[0] + \".json\"\n        imagePath = osp.relpath(filename, osp.dirname(json_name))\n        lf.save(\n            filename=json_name,\n            shapes=shapes,\n            imagePath=imagePath,\n            imageData=imageData,\n            imageHeight=image.height(),\n            imageWidth=image.width(),\n            otherData={},\n            flags={},\n        )\n"
  },
  {
    "path": "DLTA_AI_app/labelme/label_file.py",
    "content": "import base64\nimport contextlib\nimport io\nimport json\nimport os.path as osp\n\nimport PIL.Image\n\nfrom labelme import __version__\nfrom labelme.logger import logger\nfrom labelme import PY2\nfrom labelme import QT4\nfrom labelme import utils\n\n\nPIL.Image.MAX_IMAGE_PIXELS = None\n\n\n@contextlib.contextmanager\ndef open(name, mode):\n    assert mode in [\"r\", \"w\"]\n    if PY2:\n        mode += \"b\"\n        encoding = None\n    else:\n        encoding = \"utf-8\"\n    yield io.open(name, mode, encoding=encoding)\n    return\n\n\nclass LabelFileError(Exception):\n    pass\n\n\nclass LabelFile(object):\n\n    suffix = \".json\"\n\n    def __init__(self, filename=None):\n        self.shapes = []\n        self.imagePath = None\n        self.imageData = None\n        if filename is not None:\n            self.load(filename)\n        self.filename = filename\n\n    @staticmethod\n    def load_image_file(filename):\n        try:\n            image_pil = PIL.Image.open(filename)\n        except IOError:\n            logger.error(\"Failed opening image file: {}\".format(filename))\n            return\n\n        # apply orientation to image according to exif\n        image_pil = utils.apply_exif_orientation(image_pil)\n\n        with io.BytesIO() as f:\n            ext = osp.splitext(filename)[1].lower()\n            if PY2 and QT4:\n                format = \"PNG\"\n            elif ext in [\".jpg\", \".jpeg\"]:\n                format = \"JPEG\"\n            else:\n                format = \"PNG\"\n            image_pil.save(f, format=format)\n            f.seek(0)\n            return f.read()\n\n    def load(self, filename):\n        keys = [\n            \"version\",\n            \"imageData\",\n            \"imagePath\",\n            \"shapes\",  # polygonal annotations\n            \"flags\",  # image level flags\n            \"imageHeight\",\n            \"imageWidth\",\n        ]\n        shape_keys = [\n            \"label\",\n            \"points\",\n            \"bbox\",\n            \"group_id\",\n            \"shape_type\",\n            \"flags\",\n            \"content\"\n        ]\n        try:\n            with open(filename, \"r\") as f:\n                data = json.load(f)\n            version = data.get(\"version\")\n            if version is None:\n                logger.warn(\n                    \"Loading JSON file ({}) of unknown version\".format(\n                        filename\n                    )\n                )\n            elif version.split(\".\")[0] != __version__.split(\".\")[0]:\n                logger.warn(\n                    \"This JSON file ({}) may be incompatible with \"\n                    \"current labelme. version in file: {}, \"\n                    \"current version: {}\".format(\n                        filename, version, __version__\n                    )\n                )\n\n            if data[\"imageData\"] is not None:\n                imageData = base64.b64decode(data[\"imageData\"])\n                if PY2 and QT4:\n                    imageData = utils.img_data_to_png_data(imageData)\n            else:\n                # relative path from label file to relative path from cwd\n                imagePath = osp.join(osp.dirname(filename), data[\"imagePath\"])\n                imageData = self.load_image_file(imagePath)\n            flags = data.get(\"flags\") or {}\n            imagePath = data[\"imagePath\"]\n            self._check_image_height_and_width(\n                base64.b64encode(imageData).decode(\"utf-8\"),\n                data.get(\"imageHeight\"),\n                data.get(\"imageWidth\"),\n            )\n            shapes = [\n                dict(\n                    label=s[\"label\"],\n                    points=s[\"points\"],\n                    bbox = s[\"bbox\"],\n                    shape_type=s.get(\"shape_type\", \"polygon\"),\n                    flags=s.get(\"flags\", {}),\n                    content=s.get(\"content\"),\n                    group_id=s.get(\"group_id\"),\n                    other_data={\n                        k: v for k, v in s.items() if k not in shape_keys\n                    },\n                )\n                for s in data[\"shapes\"]\n            ]\n        except Exception as e:\n            raise LabelFileError(e)\n\n        otherData = {}\n        for key, value in data.items():\n            if key not in keys:\n                otherData[key] = value\n\n        # Only replace data after everything is loaded.\n        self.flags = flags\n        self.shapes = shapes\n        self.imagePath = imagePath\n        self.imageData = imageData\n        self.filename = filename\n        self.otherData = otherData\n\n    @staticmethod\n    def _check_image_height_and_width(imageData, imageHeight, imageWidth):\n        img_arr = utils.img_b64_to_arr(imageData)\n        if imageHeight is not None and img_arr.shape[0] != imageHeight:\n            logger.error(\n                \"imageHeight does not match with imageData or imagePath, \"\n                \"so getting imageHeight from actual image.\"\n            )\n            imageHeight = img_arr.shape[0]\n        if imageWidth is not None and img_arr.shape[1] != imageWidth:\n            logger.error(\n                \"imageWidth does not match with imageData or imagePath, \"\n                \"so getting imageWidth from actual image.\"\n            )\n            imageWidth = img_arr.shape[1]\n        return imageHeight, imageWidth\n\n    def save(\n        self,\n        filename,\n        shapes,\n        imagePath,\n        imageHeight,\n        imageWidth,\n        imageData=None,\n        otherData=None,\n        flags=None,\n    ):\n        if imageData is not None:\n            imageData = base64.b64encode(imageData).decode(\"utf-8\")\n            imageHeight, imageWidth = self._check_image_height_and_width(\n                imageData, imageHeight, imageWidth\n            )\n        if otherData is None:\n            otherData = {}\n        if flags is None:\n            flags = {}\n        data = dict(\n            version=__version__,\n            flags=flags,\n            shapes=shapes,\n            imagePath=imagePath,\n            imageData=imageData,\n            imageHeight=imageHeight,\n            imageWidth=imageWidth,\n        )\n        for key, value in otherData.items():\n            assert key not in data\n            data[key] = value\n        try:\n            with open(filename, \"w\") as f:\n                json.dump(data, f, ensure_ascii=False, indent=2)\n            self.filename = filename\n        except Exception as e:\n            raise LabelFileError(e)\n\n    @staticmethod\n    def is_label_file(filename):\n        return osp.splitext(filename)[1].lower() == LabelFile.suffix\n"
  },
  {
    "path": "DLTA_AI_app/labelme/logger.py",
    "content": "import datetime\nimport logging\nimport os\n\nimport termcolor\n\nif os.name == \"nt\":  # Windows\n    import colorama\n\n    colorama.init()\n\nfrom . import __appname__\n\n\nCOLORS = {\n    \"WARNING\": \"yellow\",\n    \"INFO\": \"white\",\n    \"DEBUG\": \"blue\",\n    \"CRITICAL\": \"red\",\n    \"ERROR\": \"red\",\n}\n\n\nclass ColoredFormatter(logging.Formatter):\n    def __init__(self, fmt, use_color=True):\n        logging.Formatter.__init__(self, fmt)\n        self.use_color = use_color\n\n    def format(self, record):\n        levelname = record.levelname\n        if self.use_color and levelname in COLORS:\n\n            def colored(text):\n                return termcolor.colored(\n                    text,\n                    color=COLORS[levelname],\n                    attrs={\"bold\": True},\n                )\n\n            record.levelname2 = colored(\"{:<7}\".format(record.levelname))\n            record.message2 = colored(record.msg)\n\n            asctime2 = datetime.datetime.fromtimestamp(record.created)\n            record.asctime2 = termcolor.colored(asctime2, color=\"green\")\n\n            record.module2 = termcolor.colored(record.module, color=\"cyan\")\n            record.funcName2 = termcolor.colored(record.funcName, color=\"cyan\")\n            record.lineno2 = termcolor.colored(record.lineno, color=\"cyan\")\n        return logging.Formatter.format(self, record)\n\n\nclass ColoredLogger(logging.Logger):\n\n    FORMAT = (\n        \"[%(levelname2)s] %(module2)s:%(funcName2)s:%(lineno2)s - %(message2)s\"\n    )\n\n    def __init__(self, name):\n        logging.Logger.__init__(self, name, logging.INFO)\n\n        color_formatter = ColoredFormatter(self.FORMAT)\n\n        console = logging.StreamHandler()\n        console.setFormatter(color_formatter)\n\n        self.addHandler(console)\n        return\n\n\nlogging.setLoggerClass(ColoredLogger)\nlogger = logging.getLogger(__appname__)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/shape.py",
    "content": "import copy\nimport math\n\nfrom PyQt6 import QtCore\nfrom PyQt6 import QtGui\n\nimport labelme.utils\n\n\n# TODO(unknown):\n# - [opt] Store paths instead of creating new ones at each paint.\n\n\nDEFAULT_LINE_COLOR = QtGui.QColor(0, 255, 0, 128)  # bf hovering\nDEFAULT_FILL_COLOR = QtGui.QColor(0, 255, 0, 128)  # hovering\nDEFAULT_SELECT_LINE_COLOR = QtGui.QColor(255, 255, 255)  # selected\nDEFAULT_SELECT_FILL_COLOR = QtGui.QColor(0, 255, 0, 155)  # selected\nDEFAULT_VERTEX_FILL_COLOR = QtGui.QColor(0, 255, 0, 255)  # hovering\nDEFAULT_HVERTEX_FILL_COLOR = QtGui.QColor(255, 255, 255, 255)  # hovering\n\n\nclass Shape(object):\n\n    # Render handles as squares\n    P_SQUARE = 0\n\n    # Render handles as circles\n    P_ROUND = 1\n\n    # Flag for the handles we would move if dragging\n    MOVE_VERTEX = 0\n\n    # Flag for all other handles on the curent shape\n    NEAR_VERTEX = 1\n\n    # The following class variables influence the drawing of all shape objects.\n    line_color = DEFAULT_LINE_COLOR\n    fill_color = DEFAULT_FILL_COLOR\n    select_line_color = DEFAULT_SELECT_LINE_COLOR\n    select_fill_color = DEFAULT_SELECT_FILL_COLOR\n    vertex_fill_color = DEFAULT_VERTEX_FILL_COLOR\n    hvertex_fill_color = DEFAULT_HVERTEX_FILL_COLOR\n    point_type = P_ROUND\n    point_size = 8\n    scale = 1.0\n\n    def __init__(\n        self,\n        label=None,\n        line_color=None,\n        shape_type=None,\n        flags=None,\n        group_id=None,\n        content=None\n    ):\n        self.label = label\n        self.group_id = group_id\n        self.points = []\n        self.bbox = []\n        self.fill = False\n        self.selected = False\n        self.shape_type = shape_type\n        self.flags = flags\n        self.content = content\n        self.other_data = {}\n\n        self._highlightIndex = None\n        self._highlightMode = self.NEAR_VERTEX\n        self._highlightSettings = {\n            self.NEAR_VERTEX: (4, self.P_ROUND),\n            self.MOVE_VERTEX: (1.5, self.P_SQUARE),\n        }\n\n        self._closed = False\n\n        if line_color is not None:\n            # Override the class line_color attribute\n            # with an object attribute. Currently this\n            # is used for drawing the pending line a different color.\n            self.line_color = line_color\n\n        self.shape_type = shape_type\n\n    @property\n    def shape_type(self):\n        return self._shape_type\n\n    @shape_type.setter\n    def shape_type(self, value):\n        if value is None:\n            value = \"polygon\"\n        if value not in [\n            \"polygon\",\n            \"rectangle\",\n            \"point\",\n            \"line\",\n            \"circle\",\n            \"linestrip\",\n        ]:\n            raise ValueError(\"Unexpected shape_type: {}\".format(value))\n        self._shape_type = value\n\n    def close(self):\n        self._closed = True\n\n    def addPoint(self, point):\n        if self.points and point == self.points[0]:\n            self.close()\n        else:\n            self.points.append(point)\n\n    def canAddPoint(self):\n        return self.shape_type in [\"polygon\", \"linestrip\"]\n\n    def popPoint(self):\n        if self.points:\n            return self.points.pop()\n        return None\n\n    def insertPoint(self, i, point):\n        self.points.insert(i, point)\n\n    def removePoint(self, i):\n        self.points.pop(i)\n\n    def isClosed(self):\n        return self._closed\n\n    def setOpen(self):\n        self._closed = False\n\n    def getRectFromLine(self, pt1, pt2):\n        x1, y1 = pt1.x(), pt1.y()\n        x2, y2 = pt2.x(), pt2.y()\n        return QtCore.QRectF(x1, y1, x2 - x1, y2 - y1)\n\n    def paint(self, painter):\n        if self.points:\n            color = (\n                self.select_line_color if self.selected else self.line_color\n            )\n            pen = QtGui.QPen(color)\n            # Try using integer sizes for smoother drawing(?)\n            pen.setWidth(max(1, int(round(2.0 / self.scale))))\n            painter.setPen(pen)\n\n            line_path = QtGui.QPainterPath()\n            vrtx_path = QtGui.QPainterPath()\n\n            if self.shape_type == \"rectangle\":\n                assert len(self.points) in [1, 2]\n                if len(self.points) == 2:\n                    rectangle = self.getRectFromLine(*self.points)\n                    line_path.addRect(rectangle)\n                for i in range(len(self.points)):\n                    self.drawVertex(vrtx_path, i)\n            elif self.shape_type == \"circle\":\n                assert len(self.points) in [1, 2]\n                if len(self.points) == 2:\n                    rectangle = self.getCircleRectFromLine(self.points)\n                    line_path.addEllipse(rectangle)\n                for i in range(len(self.points)):\n                    self.drawVertex(vrtx_path, i)\n            elif self.shape_type == \"linestrip\":\n                line_path.moveTo(self.points[0])\n                for i, p in enumerate(self.points):\n                    line_path.lineTo(p)\n                    self.drawVertex(vrtx_path, i)\n            else:\n                line_path.moveTo(self.points[0])\n                # Uncommenting the following line will draw 2 paths\n                # for the 1st vertex, and make it non-filled, which\n                # may be desirable.\n                # self.drawVertex(vrtx_path, 0)\n\n                for i, p in enumerate(self.points):\n                    line_path.lineTo(p)\n                    self.drawVertex(vrtx_path, i)\n                if self.isClosed():\n                    line_path.lineTo(self.points[0])\n\n            painter.drawPath(line_path)\n            painter.drawPath(vrtx_path)\n            painter.fillPath(vrtx_path, self._vertex_fill_color)\n            if self.fill:\n                color = (\n                    self.select_fill_color\n                    if self.selected\n                    else self.fill_color\n                )\n                painter.fillPath(line_path, color)\n\n    def drawVertex(self, path, i):\n        d = self.point_size / self.scale\n        shape = self.point_type\n        point = self.points[i]\n        if i == self._highlightIndex:\n            size, shape = self._highlightSettings[self._highlightMode]\n            d *= size\n        if self._highlightIndex is not None:\n            self._vertex_fill_color = self.hvertex_fill_color\n        else:\n            self._vertex_fill_color = self.vertex_fill_color\n        if shape == self.P_SQUARE:\n            path.addRect(point.x() - d / 2, point.y() - d / 2, d, d)\n        elif shape == self.P_ROUND:\n            path.addEllipse(point, d / 2.0, d / 2.0)\n        else:\n            assert False, \"unsupported vertex shape\"\n\n    def nearestVertex(self, point, epsilon):\n        min_distance = float(\"inf\")\n        min_i = None\n        for i, p in enumerate(self.points):\n            dist = labelme.utils.distance(p - point)\n            if dist <= epsilon and dist < min_distance:\n                min_distance = dist\n                min_i = i\n        return min_i\n\n    def nearestEdge(self, point, epsilon):\n        min_distance = float(\"inf\")\n        post_i = None\n        for i in range(len(self.points)):\n            line = [self.points[i - 1], self.points[i]]\n            dist = labelme.utils.distancetoline(point, line)\n            if dist <= epsilon and dist < min_distance:\n                min_distance = dist\n                post_i = i\n        return post_i\n\n    def containsPoint(self, point):\n        return self.makePath().contains(point)\n\n    def getCircleRectFromLine(self, line):\n        \"\"\"Computes parameters to draw with `QPainterPath::addEllipse`\"\"\"\n        if len(line) != 2:\n            return None\n        (c, point) = line\n        r = line[0] - line[1]\n        d = math.sqrt(math.pow(r.x(), 2) + math.pow(r.y(), 2))\n        rectangle = QtCore.QRectF(c.x() - d, c.y() - d, 2 * d, 2 * d)\n        return rectangle\n\n    def makePath(self):\n        if self.shape_type == \"rectangle\":\n            path = QtGui.QPainterPath()\n            if len(self.points) == 2:\n                rectangle = self.getRectFromLine(*self.points)\n                path.addRect(rectangle)\n        elif self.shape_type == \"circle\":\n            path = QtGui.QPainterPath()\n            if len(self.points) == 2:\n                rectangle = self.getCircleRectFromLine(self.points)\n                path.addEllipse(rectangle)\n        else:\n            path = QtGui.QPainterPath(self.points[0])\n            for p in self.points[1:]:\n                path.lineTo(p)\n        return path\n\n    def boundingRect(self):\n        return self.makePath().boundingRect()\n\n    def moveBy(self, offset):\n        self.points = [p + offset for p in self.points]\n\n    def moveVertexBy(self, i, offset):\n        self.points[i] = self.points[i] + offset\n\n    def highlightVertex(self, i, action):\n        \"\"\"Highlight a vertex appropriately based on the current action\n\n        Args:\n            i (int): The vertex index\n            action (int): The action\n            (see Shape.NEAR_VERTEX and Shape.MOVE_VERTEX)\n        \"\"\"\n        self._highlightIndex = i\n        self._highlightMode = action\n\n    def highlightClear(self):\n        \"\"\"Clear the highlighted point\"\"\"\n        self._highlightIndex = None\n\n    def copy(self):\n        return copy.deepcopy(self)\n\n    def __len__(self):\n        return len(self.points)\n\n    def __getitem__(self, key):\n        return self.points[key]\n\n    def __setitem__(self, key, value):\n        self.points[key] = value\n"
  },
  {
    "path": "DLTA_AI_app/labelme/testing.py",
    "content": "import json\nimport os.path as osp\n\nimport imgviz\nimport labelme.utils\n\n\ndef assert_labelfile_sanity(filename):\n    assert osp.exists(filename)\n\n    data = json.load(open(filename))\n\n    assert \"imagePath\" in data\n    imageData = data.get(\"imageData\", None)\n    if imageData is None:\n        parent_dir = osp.dirname(filename)\n        img_file = osp.join(parent_dir, data[\"imagePath\"])\n        assert osp.exists(img_file)\n        img = imgviz.io.imread(img_file)\n    else:\n        img = labelme.utils.img_b64_to_arr(imageData)\n\n    H, W = img.shape[:2]\n    assert H == data[\"imageHeight\"]\n    assert W == data[\"imageWidth\"]\n\n    assert \"shapes\" in data\n    for shape in data[\"shapes\"]:\n        assert \"label\" in shape\n        assert \"points\" in shape\n        for x, y in shape[\"points\"]:\n            assert 0 <= x <= W\n            assert 0 <= y <= H\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/__init__.py",
    "content": "# flake8: noqa\n\nfrom ._io import lblsave\n\nfrom .image import apply_exif_orientation\nfrom .image import img_arr_to_b64\nfrom .image import img_b64_to_arr\nfrom .image import img_data_to_arr\nfrom .image import img_data_to_pil\nfrom .image import img_data_to_png_data\nfrom .image import img_pil_to_data\n\nfrom .shape import labelme_shapes_to_label\nfrom .shape import masks_to_bboxes\nfrom .shape import polygons_to_mask\nfrom .shape import shape_to_mask\nfrom .shape import shapes_to_label\n\nfrom .qt import newIcon\nfrom .qt import newButton\nfrom .qt import newAction\nfrom .qt import addActions\nfrom .qt import labelValidator\nfrom .qt import struct\nfrom .qt import distance\nfrom .qt import distancetoline\nfrom .qt import fmtShortcut\n\nfrom .export import exportCOCO, exportCOCOvid, exportMOT, FolderDialog, parse_img_export\nfrom .model_explorer import ModelExplorerDialog\nfrom labelme.widgets.links import open_git_hub, open_license, open_guide\nfrom labelme.widgets import runtime_data_UI, preferences_UI, shortcut_selector_UI, check_updates_UI, feedback_UI\nfrom .vid_to_frames import VideoFrameExtractor\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/_io.py",
    "content": "import os.path as osp\n\nimport numpy as np\nimport PIL.Image\n\n\ndef lblsave(filename, lbl):\n    import imgviz\n\n    if osp.splitext(filename)[1] != \".png\":\n        filename += \".png\"\n    # Assume label ranses [-1, 254] for int32,\n    # and [0, 255] for uint8 as VOC.\n    if lbl.min() >= -1 and lbl.max() < 255:\n        lbl_pil = PIL.Image.fromarray(lbl.astype(np.uint8), mode=\"P\")\n        colormap = imgviz.label_colormap()\n        lbl_pil.putpalette(colormap.flatten())\n        lbl_pil.save(filename)\n    else:\n        raise ValueError(\n            \"[%s] Cannot save the pixel-wise class label as PNG. \"\n            \"Please consider using the .npy format.\" % filename\n        )\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/custom_exports.py",
    "content": "# Don't Modify These Lines\n# =========================================\ncustom_exports_list = []\n\n# custom export class blueprint\nclass CustomExport:\n    \"\"\"\n    A blueprint for defining custom exports.\n\n    Attributes:\n        file_name (str): The name of the file to export to.\n        button_name (str): The name of the button that triggers the export.\n        format (str): The format of the exported file.\n        function (callable): The function that generates the export data.\n        mode (str): The mode of the export, either \"video\" or \"image\".\n\n    Methods:\n        __call__(*args): Calls the function with the given arguments and returns the result.\n    \"\"\"\n    def __init__(self, file_name, button_name, format, function, mode = \"video\"):\n        \"\"\"\n        Initializes a new instance of the CustomExport class.\n\n        Args:\n            file_name (str): The name of the file to export to.\n            button_name (str): The name of the button that triggers the export.\n            format (str): The format of the exported file.\n            function (callable): The function that generates the export data.\n            mode (str): The mode of the export, either \"video\" or \"image\".\n        \"\"\"\n        self.file_name = file_name\n        self.button_name = button_name\n        self.format = format\n        self.function = function\n        self.mode = mode\n\n        custom_exports_list.append(self)\n    \n    def __call__(self, *args):\n        \"\"\"\n        Calls the function with the given arguments and returns the result.\n\n        Args:\n            *args: The arguments to pass to the function.\n\n        Returns:\n            The result of calling the function with the given arguments.\n        \"\"\"\n        return self.function(*args)\n    \n# =========================================\n\n\n# Add your functions here ()\n\"\"\" These functions must be divided as following:\n\n1- helper functions: functions that are used by other functions and are not exported, no restrictions on them at all\n\nExample: foo() in the dummy functions below.\n\n2- exported functions (video): functions that are exported (video mode only):\nthey take the following arguments:\n    results_file (str): Path to the JSON file containing the object detection results.\n    vid_width (int): Width of the video frames.\n    vid_height (int): Height of the video frames.\n    annotation_path (str): Path to the output COCO annotation file.\n\nand return annotation_path (str): to check if the function is working properly.\n\nExample: bar() in the dummy functions below.\n\n3- exported functions (image/dir): functions that are exported (image and dir mode * including video as frames *):\nthey take the following arguments:\n    json_paths (list): List of paths to the JSON files containing the object detection results.\n    annotation_path (str): The path to the output file.\n\nand return annotation_path (str): to check if the function is working properly.\n\nExample: baz() in the dummy functions below.    \n\n** WARINING: All EXPORT FUNCTIONS MUST HAVE THE SAME ARGUMENTS OR ELSE THEY WILL NOT WORK. **\n\nIt's recommended to check `exports.py` file to see how the functions are called\n\n\"\"\"\n# =========================================\n# dummy functions for testing\ndef foo():\n    print(\"foo\")\n\ndef bar(results_file, vid_width, vid_height, annotation_path):\n    foo()\n    print(\"bar\")\n    print(f\"Export Function Check: results_file: {results_file} | vid_width: {vid_width} | vid_height: {vid_height} | annotation_path: {annotation_path}\")\n\n    return annotation_path\n\ndef baz(json_paths, annotation_path):\n    foo()\n    print(\"baz\")\n    print(f\"Export Function Check: json_paths {json_paths} | annotation_path: {annotation_path}\")\n\n    return annotation_path\n\n\ndef count_objects(json_paths, annotation_path):\n    import matplotlib.pyplot as plt\n    import json\n\n    labels = []\n    counts = []\n    # Loop through each JSON file\n    for i in range(len(json_paths)):\n        with open(json_paths[i]) as f:\n            labels.append(json_paths[i].split(\"time_\")[-1].split(\".\")[0].replace(\"_\", \":\")[-5:])\n            # Load the JSON data\n            data = json.load(f)\n            inner_count = 0\n            for j in range(len(data[\"shapes\"])):\n                inner_count += 1\n            counts.append(inner_count)\n        \n        # Plot the counts\n        plt.figure(figsize=(20, 12))\n        plt.plot(counts)\n        plt.title(\"Number of Objects Over Time\")\n        plt.tight_layout(pad=3)\n        plt.grid()\n        plt.xticks(range(len(labels)), labels, rotation=90)\n        plt.yticks(range(max(counts)+1))\n        plt.xlabel(\"Time\")\n        plt.ylabel(\"Number of Objects\")\n        plt.savefig(annotation_path)\n        plt.close()\n    \n    return annotation_path\n                \n# =========================================\n\n\n# create your custom exports here\n# =========================================\n\n# dummy exports for testing\n# CustomExport(\"file_name\", \"video test1\", \"format\", bar, \"video\")\n# CustomExport(\"file_name\", \"image test1\", \"format\", baz, \"image\")\nCustomExport(\"plot_counts\", \"Plot Counts\", \"png\", count_objects, \"image\")\n\n\n\n\n\n# =========================================\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/export.py",
    "content": "import datetime\nimport glob\nimport json\nimport os\nimport csv\nimport numpy as np\nfrom PyQt6.QtWidgets import QFileDialog\n\ncoco_classes = [\"person\", \"bicycle\", \"car\", \"motorcycle\", \"airplane\", \"bus\", \"train\", \"truck\", \"boat\", \"traffic light\", \"fire hydrant\", \"stop sign\", \"parking meter\", \"bench\", \"bird\", \"cat\", \"dog\", \"horse\", \"sheep\", \"cow\", \"elephant\", \"bear\", \"zebra\", \"giraffe\", \"backpack\", \"umbrella\", \"handbag\", \"tie\", \"suitcase\", \"frisbee\", \"skis\", \"snowboard\", \"sports ball\", \"kite\", \"baseball bat\", \"baseball glove\", \"skateboard\", \"surfboard\", \"tennis racket\", \"bottle\", \"wine glass\", \"cup\", \"fork\", \"knife\", \"spoon\", \"bowl\", \"banana\", \"apple\", \"sandwich\", \"orange\", \"broccoli\", \"carrot\", \"hot dog\", \"pizza\", \"donut\", \"cake\", \"chair\", \"couch\", \"potted plant\", \"bed\", \"dining table\", \"toilet\", \"tv\", \"laptop\", \"mouse\", \"remote\", \"keyboard\", \"cell phone\", \"microwave\", \"oven\", \"toaster\", \"sink\", \"refrigerator\", \"book\", \"clock\", \"vase\", \"scissors\", \"teddy bear\", \"hair drier\", \"toothbrush\"]\n\ndef center_of_polygon(polygon):\n    \"\"\"\n    Calculates the center of a polygon defined by a list of consecutive pairs of vertices.\n\n    Args:\n        polygon (list): A list of consecutive pairs of vertices.\n\n    Returns:\n        tuple: The center point of the polygon as a tuple of two integers.\n    \"\"\"\n    # Extract x and y coordinates from the list of polygon vertices\n    x_coords = polygon[::2]  # Get every other element starting from the first (x-coordinates)\n    y_coords = polygon[1::2]  # Get every other element starting from the second (y-coordinates)\n\n    # Calculate the center point of the polygon\n    center_x = sum(x_coords) / len(x_coords)  # Calculate the average x-coordinate\n    center_y = sum(y_coords) / len(y_coords)  # Calculate the average y-coordinate\n    center = [center_x, center_y]  # Store the center point as a list\n\n    # Find the center of the bounding box of the polygon\n    xmin = min(x_coords)  # Get the minimum x-coordinate\n    xmax = max(x_coords)  # Get the maximum x-coordinate\n    ymin = min(y_coords)  # Get the minimum y-coordinate\n    ymax = max(y_coords)  # Get the maximum y-coordinate\n    centers_rec = [(xmin + xmax) / 2, (ymin + ymax) / 2]  # Calculate the center of the bounding box\n\n    # Calculate the final center point as a weighted average of the polygon center and the bounding box center\n    (xp, yp) = centers_rec  # Unpack the bounding box center coordinates\n    (xn, yn) = center  # Unpack the polygon center coordinates\n    r = 0.5  # Set the weight for the polygon center\n    x = r * xn + (1 - r) * xp  # Calculate the weighted average x-coordinate\n    y = r * yn + (1 - r) * yp  # Calculate the weighted average y-coordinate\n    center = (int(x), int(y))  # Store the final center point as a tuple of integers\n    return center  # Return the final center point\n\ndef get_bbox(segmentation):\n    \"\"\"\n    Calculates the bounding box of a polygon defined by a list of consecutive pairs of x-y coordinates.\n\n    Args:\n        segmentation (list): A list of consecutive pairs of x-y coordinates that define a polygon.\n\n    Returns:\n        list: A list of four values: the minimum x and y values, and the width and height of the bounding box that encloses the polygon.\n    \"\"\"\n    try:\n        x = []\n        y = []\n        # Extract x and y coordinates from the segmentation list\n        for i in range(len(segmentation)):\n            if i % 2 == 0:\n                x.append(segmentation[i])\n            else:\n                y.append(segmentation[i])\n        # Calculate the minimum x and y values, and the width and height of the bounding box\n        return [min(x), min(y), max(x) - min(x), max(y) - min(y)]\n    except:\n        # If an exception occurs (e.g. if the segmentation list is empty), convert it into a 1D array\n        segmentation = [item for sublist in segmentation for item in sublist]\n        x = []\n        y = []\n        # Extract x and y coordinates from the 1D segmentation array\n        for i in range(len(segmentation)):\n            if i % 2 == 0:\n                x.append(segmentation[i])\n            else:\n                y.append(segmentation[i])\n        # Calculate the minimum x and y values, and the width and height of the bounding box\n        return [min(x), min(y), max(x) - min(x), max(y) - min(y)]\n\n\ndef get_area_from_polygon(polygon, mode=\"segmentation\"):\n    \"\"\"\n    Calculates the area of a polygon defined by a list of consecutive pairs of x-y coordinates.\n\n    Args:\n        polygon (list): A list of consecutive pairs of x-y coordinates that define a polygon.\n        mode (str): The mode to use for calculating the area. Can be \"segmentation\" (default) or \"bbox\".\n\n    Returns:\n        float: The area of the polygon.\n    \"\"\"\n    if mode == \"segmentation\":\n        # Convert the list to a numpy array of shape (n, 2) where n is the number of vertices\n        polygon = np.array(polygon).reshape(-1, 2)\n        # Use the shoelace formula to calculate the area of the polygon\n        area = 0.5 * np.abs(np.dot(polygon[:, 0], np.roll(polygon[:, 1], 1)) -\n                            np.dot(polygon[:, 1], np.roll(polygon[:, 0], 1)))\n        # Return the area\n        return area\n\n    elif mode == \"bbox\":\n        # Unpack the list into variables\n        x_min, y_min, width, height = polygon\n        # Calculate the area by multiplying the width and height\n        area = width * height\n        # Return the area\n        return area\n\n    else:\n        raise ValueError(\"mode must be either 'segmentation' or 'bbox'\")\n\n# the parsing function is called in the main export function (in app.py) before exporting in image or dir mode\ndef parse_img_export(target_directory, save_path):\n    import json\n    import glob\n\n    # If the target is not a directory, set the file path to the save path\n    try:\n        if target_directory == \"\":\n            image_mode = True\n        else:\n            image_mode = False\n\n        # Get all the JSON files in the specified directory\n        json_paths = glob.glob(f\"{target_directory}/*.json\")\n        if image_mode:\n            json_paths = [save_path]\n        # Raise an error if no JSON files are found in the directory\n        if len(json_paths) == 0:\n            raise ValueError(\"No json files found in the directory\")\n    except Exception as e:\n        print(f\"Error parsing image export: {e}\")\n        return None\n    \n    return json_paths\n\ndef exportCOCO(json_paths, annotation_path):\n    \"\"\"\n    Export annotations in COCO format from a directory of JSON files for image and dir modes\n\n    Args:\n        target_directory (str): The directory containing the JSON files (dir)\n        save_path (str): The path to save the output file (image mode)\n        annotation_path (str): The path to the output file.\n\n    Returns:\n        str: The path to the output file.\n\n    Raises:\n        ValueError: If no JSON files are found in the directory.\n\n    \"\"\"\n    # Create a dictionary to store the file info\n    file = {}\n\n    # Write the info header\n    file[\"info\"] = {\n        \"description\": \"Exported from DLTA-AI\",\n        # \"url\": \"n/a\",\n        # \"version\": \"n/a\",\n        \"year\": datetime.datetime.now().year,\n        # \"contributor\": \"n/a\",\n        \"date_created\": datetime.date.today().strftime(\"%Y/%m/%d\")\n    }\n\n    # Create an empty set to store the used classes\n    used_classes = set()\n\n    # Create empty lists to store annotations and images\n    annotations = []\n    images = []\n\n    # Loop through each JSON file\n    for i in range(len(json_paths)):\n        try:\n            with open(json_paths[i]) as f:\n                # Load the JSON data\n                data = json.load(f)\n\n                # Add image data to the images list\n                images.append({\n                    \"id\": i,\n                    \"width\": data[\"imageWidth\"],\n                    \"height\": data[\"imageHeight\"],\n                    \"file_name\": json_paths[i].split(\"/\")[-1].replace(\".json\", \".jpg\"),\n                })\n\n                # Loop through each shape in the JSON data\n                for j in range(len(data[\"shapes\"])):\n                    # Skip shapes with no points\n                    if len(data[\"shapes\"][j][\"points\"],) == 0:\n                        continue\n\n                    # Add the class to the used_classes set if it hasn't been added yet\n                    if data[\"shapes\"][j][\"label\"].lower() not in coco_classes:\n                        print(f\"{data['shapes'][j]['label']} is not a valid COCO class.. Adding it to the list.\")\n                        coco_classes.append((data[\"shapes\"][j][\"label\"].lower()))\n\n                    # Add annotation data to the annotations list\n                    annotations.append({\n                        \"id\": len(annotations),\n                        \"image_id\": i,\n                        \"category_id\": coco_classes.index(data[\"shapes\"][j][\"label\"].lower()) + 1,\n                        \"bbox\": get_bbox(data[\"shapes\"][j][\"points\"]),\n                        \"iscrowd\": 0\n                    })\n\n                    # Try to add segmentation and area data to the annotation\n                    try:\n                        annotations[-1][\"segmentation\"] = [data[\"shapes\"][j][\"points\"]]\n                        annotations[-1][\"area\"] = get_area_from_polygon(\n                            annotations[-1][\"segmentation\"][0], mode=\"segmentation\")\n                    except:\n                        annotations[-1][\"area\"] = get_area_from_polygon(\n                            annotations[-1][\"bbox\"], mode=\"bbox\")\n\n                    # Try to add score data to the annotation\n                    try:\n                        annotations[-1][\"score\"] = float(data[\"shapes\"][j][\"content\"])\n                    except:\n                        pass\n\n                    # Add the class to the used_classes set\n                    used_classes.add(coco_classes.index(data[\"shapes\"][j][\"label\"].lower()) + 1)\n\n        # If there's an error with the JSON file, print the error and continue to the next file\n        except Exception as e:\n            print(f\"Error with {json_paths[i]}\")\n            print(e)\n            continue\n\n    # Sort the used_classes set and add the categories to the file dictionary\n    used_classes = sorted(used_classes)\n    file[\"categories\"] = [{\"id\": i, \"name\": coco_classes[i - 1]} for i in used_classes]\n\n    # Add the images and annotations to the file dictionary\n    file[\"images\"] = images\n    file[\"annotations\"] = annotations\n\n    # Write the file dictionary to the output file in JSON format\n    with open(annotation_path, 'w') as outfile:\n        json.dump(file, outfile, indent=4)\n\n    # Return the path to the output file\n    return annotation_path\n\n\ndef exportCOCOvid(results_file, vid_width, vid_height, annotation_path):\n    \"\"\"\n    Export object detection results in COCO format for a video.\n\n    Args:\n        results_file (str): Path to the JSON file containing the object detection results.\n        vid_width (int): Width of the video frames.\n        vid_height (int): Height of the video frames.\n        annotation_path (str): Path to the output COCO annotation file.\n\n    Returns:\n        str: Path to the output COCO annotation file.\n\n    Raises:\n        ValueError: If no object detection results are found in the JSON file.\n\n    \"\"\"\n    file = {}\n    file[\"info\"] = {\n        \"description\": \"Exported from DLTA-AI\",\n        # \"url\": \"n/a\",\n        # \"version\": \"n/a\",\n        \"year\": datetime.datetime.now().year,\n        # \"contributor\": \"n/a\",\n        \"date_created\": datetime.date.today().strftime(\"%Y/%m/%d\")\n    }\n\n    annotations = []\n    images = []\n\n    # Create an empty set to store the used classes\n    used_classes = set()\n\n    # Open the results file and load the JSON data\n    with open(results_file) as f:\n        data = json.load(f)\n\n        # Loop through each frame in the JSON data\n        for frame in data:\n            # Skip frames with no object detection results\n            if len(frame[\"frame_data\"]) == 0:\n                continue\n\n            # Add image data to the images list\n            images.append({\n                \"id\": frame[\"frame_idx\"],\n                \"width\": vid_width,\n                \"height\": vid_height,\n                \"file_name\": f\"frame {frame['frame_idx']}\",\n            })\n\n            # Loop through each object in the frame\n            for object in frame[\"frame_data\"]:\n                # Add annotation data to the annotations list\n                annotations.append({\n                    \"id\": len(annotations),\n                    \"image_id\": frame[\"frame_idx\"],\n                    \"category_id\": object[\"class_id\"] + 1,\n                    \"iscrowd\": 0\n                })\n\n                # If the category ID is 0, add the class to the coco_classes list and update the category ID\n                if annotations[-1][\"category_id\"] == 0:\n                    coco_classes.append(object[\"class_name\"].lower())\n                    annotations[-1][\"category_id\"] = coco_classes.index(object[\"class_name\"].lower()) + 1\n\n                # Try to add the object's segmentation data to the annotation\n                try:\n                    annotations[-1][\"bbox\"] = get_bbox(object[\"segment\"])\n                    annotations[-1][\"segmentation\"] = [\n                        [val for sublist in object[\"segment\"] for val in sublist]]\n                    annotations[-1][\"area\"] = get_area_from_polygon(\n                        annotations[-1][\"segmentation\"][0], mode=\"segmentation\")\n                except:\n                    # If the segmentation data is not available, use the object's bounding box data instead\n                    annotations[-1][\"bbox\"] = object[\"bbox\"]\n                    annotations[-1][\"area\"] = get_area_from_polygon(\n                        annotations[-1][\"bbox\"], mode=\"bbox\")\n\n                # Try to add the object's confidence score to the annotation\n                try:\n                    annotations[-1][\"score\"] = float(object[\"confidence\"])\n                except:\n                    pass\n\n                # Add the category ID to the used_classes set\n                used_classes.add(annotations[-1][\"category_id\"])\n\n    # Sort the used_classes set and add the categories to the file dictionary\n    used_classes = sorted(list(used_classes))\n    file[\"categories\"] = [{\"id\": i, \"name\": coco_classes[i - 1]} for i in used_classes]\n\n    # Add the images and annotations to the file dictionary\n    file[\"images\"] = images\n    file[\"annotations\"] = annotations\n\n    # Write the file dictionary to the output file in JSON format\n    with open(annotation_path, 'w') as outfile:\n        json.dump(file, outfile, indent=4)\n\n    # Return the path to the output file\n    return annotation_path\n\n\ndef exportMOT(results_file, annotation_path):\n    \"\"\"\n    Export object tracking results in MOT format.\n\n    Args:\n        results_file (str): Path to the JSON file containing the object tracking results.\n        annotation_path (str): Path to the output MOT annotation file.\n\n    Returns:\n        str: Path to the output MOT annotation file.\n\n    \"\"\"\n\n    # Open the results file and load the JSON data\n    with open(results_file) as f, open(annotation_path, 'w') as outfile:\n        # Loop through each frame in the JSON data\n        for frame in json.load(f):\n            for object in frame[\"frame_data\"]:\n                # Write the object tracking data to the output file\n                outfile.write(f'{frame[\"frame_idx\"]}, {object[\"tracker_id\"]},  {object[\"bbox\"][0]},  {object[\"bbox\"][1]},  {object[\"bbox\"][2]},  {object[\"bbox\"][3]},  {object[\"confidence\"]}, {object[\"class_id\"] + 1}, 1\\n')\n\n    # Return the path to the output file\n    return annotation_path\n\n\nclass FolderDialog(QFileDialog):\n    \"\"\"\n    A custom file dialog that allows the user to save a file with a default file name and format.\n\n    Args:\n        default_file_name (str): The default file name to use.\n        default_format (str): The default file format to use.\n\n    \"\"\"\n\n    def __init__(self, default_file_name, default_format):\n        \"\"\"\n        Initializes the FolderDialog object.\n\n        Args:\n            default_file_name (str): The default file name to use.\n            default_format (str): The default file format to use.\n\n        \"\"\"\n\n        # Call the parent constructor\n        super().__init__()\n\n        # Set the mode to save a file\n        self.setAcceptMode(QFileDialog.AcceptMode.AcceptSave)\n\n        # Set the default file name\n        self.selectFile(default_file_name)\n\n        # Set the default format\n        self.setNameFilters(\n            [f\"{default_format.upper()} (*.{default_format.lower()})\", \"All Files (*)\"])\n        self.selectNameFilter(\n            f\"{default_format.upper()} (*.{default_format.lower()})\")\n\n        # Set dialog title\n        self.setWindowTitle(\"Save Annotations\")\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/helpers/mathOps.py",
    "content": "import numpy as np\nimport random\nimport cv2\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtCore\nfrom labelme import PY2\nimport os\nimport json\nimport orjson\nimport copy\nfrom shapely.geometry import Polygon\nimport skimage\nfrom labelme.shape import Shape\n\ncoco_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',\n                'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n                'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n# make a list of 12 unique colors as we will use them to draw bounding boxes of different classes in different colors\n# so the calor palette will be used to draw bounding boxes of different classes in different colors\n# the color pallette should have the famous 12 colors as red, green, blue, yellow, cyan, magenta, white, black, gray, brown, pink, and orange in bgr format\nimport random\nimport colorsys\n\nclass ColorGen:\n    \"\"\"\n    A class for generating colors using the HLS color model.\n\n    Attributes:\n        colors (list): A list to store the generated colors.\n\n    Methods:\n        generateColors(num, lightness, saturation): Generates the specified number of colors based on the given lightness and saturation values.\n    \"\"\"\n\n    def __init__(self):\n        self.colors = []\n\n    def generateColors(self, num, lightness, saturation):\n        \"\"\"\n        Generates the specified number of colors based on the given lightness and saturation values.\n\n        Args:\n            num (int): The number of colors to generate.\n            lightness (float): The lightness value for the generated colors (0.0 to 1.0).\n            saturation (float): The saturation value for the generated colors (0.0 to 1.0).\n\n        Returns:\n            List of generated colors in RGB format.\n        \"\"\"\n        h = 0.314159265359\n        golden_ratio_conjugate = 0.618033988749895\n        for _ in range(num):\n            h += golden_ratio_conjugate \n            h %= 1\n            rgb = colorsys.hls_to_rgb(h, lightness, saturation)\n            self.colors.append([int(i * 255.0) for i in rgb])\n        return self.colors\n\nc = ColorGen()\n\ncolor_palette = c.generateColors(20,0.45,0.6)\n\n\ndef get_bbox_xyxy(segment):\n    \n    \"\"\"\n    Summary:\n        Get the bounding box of a polygon in format of [xmin, ymin, xmax, ymax].\n        \n    Args:\n        segment: a list of points\n        \n    Returns:\n        bbox: [x, y, w, h]\n    \"\"\"\n    \n    segment = np.array(segment)\n    x0 = np.min(segment[:, 0])\n    y0 = np.min(segment[:, 1])\n    x1 = np.max(segment[:, 0])\n    y1 = np.max(segment[:, 1])\n    return [x0, y0, x1, y1]\n\ndef addPoints(shape, n):\n    \n    \"\"\"\n    Summary:\n        Add points to a polygon.\n        \n    Args:\n        shape: a list of points\n        n: number of points to add\n        \n    Returns:\n        res: a list of points\n    \"\"\"\n    \n    # calculate number of points to add between each pair of points\n    sub = 1.0 * n / (len(shape) - 1) \n    \n    # if sub == 0, then n == 0, no need to add points    \n    if sub == 0:\n        return shape\n    \n    # if sub < 1, then we a point between every pair of points then we handle the points again\n    if sub < 1:\n        res = []\n        res.append(shape[0])\n        for i in range(len(shape) - 1):\n            newPoint = [(shape[i][0] + shape[i + 1][0]) / 2, (shape[i][1] + shape[i + 1][1]) / 2]\n            res.append(newPoint)\n            res.append(shape[i + 1])\n        return handlePoints(res, n + len(shape))\n    \n    # if sub > 1, then we add 'toBeAdded' points between every pair of points\n    else:\n        toBeAdded = int(sub) + 1\n        res = []\n        res.append(shape[0])\n        for i in range(len(shape) - 1):\n            dif = [shape[i + 1][0] - shape[i][0],\n                    shape[i + 1][1] - shape[i][1]]\n            for j in range(1, toBeAdded):\n                newPoint = [shape[i][0] + dif[0] * j /\n                            toBeAdded, shape[i][1] + dif[1] * j / toBeAdded]\n                res.append(newPoint)\n            res.append(shape[i + 1])\n        # recursive call to check if there are any points to add\n        return addPoints(res, n + len(shape) - len(res))\n\ndef reducePoints(polygon, n):\n    \n    \"\"\"\n    Summary:\n        Remove points from a polygon.\n        \n    Args:\n        polygon: a list of points\n        n: number of points to reduce to\n        \n    Returns:\n        polygon: a list of points\n    \"\"\"\n    # if n >= len(polygon), then no need to reduce\n    if n >= len(polygon):\n        return polygon\n    \n    # calculate the distance between each point and: \n    # 1- its previous point\n    # 2- its next point\n    # 3- the middle point between its previous and next points\n    # taking the minimum of these distances as the distance of the point\n    distances = polygon.copy()\n    for i in range(len(polygon)):\n        x1,y1,x2,y2 = polygon[i-1][0], polygon[i-1][1], polygon[(i+1)%len(polygon)][0], polygon[(i+1)%len(polygon)][1]\n        x,y = polygon[i][0], polygon[i][1]\n        \n        if x1 == x2:\n            dist_perp = abs(x - x1)\n        elif y1 == y2:\n            dist_perp = abs(y - y1)\n        else:\n            m = (y2 - y1) / (x2 - x1)\n            c = y1 - m * x1\n            dist_perp = abs(m * x - y + c) / np.sqrt(m * m + 1)\n        \n        dif_right = np.array(\n            polygon[(i + 1) % len(polygon)]) - np.array(polygon[i])\n        dist_right = np.sqrt(\n            dif_right[0] * dif_right[0] + dif_right[1] * dif_right[1])\n\n        dif_left = np.array(polygon[i - 1]) - np.array(polygon[i])\n        dist_left = np.sqrt(\n            dif_left[0] * dif_left[0] + dif_left[1] * dif_left[1])\n\n        distances[i] = min(dist_perp, dist_right, dist_left)\n    \n    # adding small random values to distances to avoid duplicate minimum distances\n    # it will not affect the result\n    distances = [distances[i] + random.random()\n                    for i in range(len(distances))]\n    ratio = 1.0 * n / len(polygon)\n    threshold = np.percentile(distances, 100 - ratio * 100)\n\n    i = 0\n    while i < len(polygon):\n        if distances[i] < threshold:\n            polygon[i] = None\n            i += 1\n        i += 1\n    res = [x for x in polygon if x is not None]\n    \n    # recursive call to check if there are any points to remove\n    return reducePoints(res, n)\n\ndef handlePoints(polygon, n):\n    \n    \"\"\"\n    Summary:\n        Add or remove points from a polygon.\n        \n    Args:\n        polygon: a list of points\n        n: number of points that the polygon should have\n        \n    Returns:\n        polygon: a list of points\n    \"\"\"\n    \n    # if n == len(polygon), then no need to add or remove points\n    if n == len(polygon):\n        return polygon\n    \n    # if n > len(polygon), then we need to add points\n    elif n > len(polygon):\n        return addPoints(polygon, n - len(polygon))\n    \n    # if n < len(polygon), then we need to remove points\n    else:\n        return reducePoints(polygon, n)\n\ndef handleTwoSegments(segment1, segment2):\n    \n    \"\"\"\n    Summary:\n        Add or remove points from two polygons to make them have the same number of points.\n        \n    Args:\n        segment1: a list of points\n        segment2: a list of points\n        \n    Returns:\n        segment1: a list of points\n        segment2: a list of points\n    \"\"\"\n    \n    if len(segment1) != len(segment2):\n        biglen = max(len(segment1), len(segment2))\n        segment1 = handlePoints(segment1, biglen)\n        segment2 = handlePoints(segment2, biglen)\n    (segment1, segment2) = allign(segment1, segment2)\n    return (segment1, segment2)\n\ndef allign(shape1, shape2):\n    \n    \"\"\"\n    Summary:\n        Allign the points of two polygons according to their slopes.\n        \n    Args:\n        shape1: a list of points\n        shape2: a list of points\n        \n    Returns:\n        shape1_alligned: a list of points\n        shape2_alligned: a list of points\n    \"\"\"\n    \n    shape1_center = centerOFmass(shape1)\n    shape1_org = [[shape1[i][0] - shape1_center[0], shape1[i]\n                    [1] - shape1_center[1]] for i in range(len(shape1))]\n    shape2_center = centerOFmass(shape2)\n    shape2_org = [[shape2[i][0] - shape2_center[0], shape2[i]\n                    [1] - shape2_center[1]] for i in range(len(shape2))]\n    \n    # sorting the points according to their slopes\n    sorted_shape1 = sorted(shape1_org, key=lambda x: np.arctan2(x[1], x[0]), reverse=True)\n    sorted_shape2 = sorted(shape2_org, key=lambda x: np.arctan2(x[1], x[0]), reverse=True)\n    shape1_alligned = [[sorted_shape1[i][0] + shape1_center[0], sorted_shape1[i]\n                        [1] + shape1_center[1]] for i in range(len(sorted_shape1))]\n    shape2_alligned = [[sorted_shape2[i][0] + shape2_center[0], sorted_shape2[i]\n                        [1] + shape2_center[1]] for i in range(len(sorted_shape2))]\n    \n    return (shape1_alligned, shape2_alligned)\n\ndef centerOFmass(points):\n    \n    \"\"\"\n    Summary:\n        Calculate the center of mass of a polygon.\n        \n    Args:\n        points: a list of points\n        \n    Returns:\n        center: a list of points\n    \"\"\"\n    nppoints = np.array(points)\n    sumX = np.sum(nppoints[:, 0])\n    sumY = np.sum(nppoints[:, 1])\n    return [int(sumX / len(points)), int(sumY / len(points))]\n\ndef flattener(list_2d):\n    \n    \"\"\"\n    Summary:\n        Flatten a list of QTpoints.\n        \n    Args:\n        list_2d: a list of QTpoints\n        \n    Returns:\n        points: a list of points\n    \"\"\"\n    \n    points = [(p.x(), p.y()) for p in list_2d]\n    points = np.array(points, np.int16).flatten().tolist()\n    return points\n\ndef mapFrameToTime(frameNumber, fps):\n    \n    \"\"\"\n    Summary:\n        Map a frame number to its time in the video.\n        \n    Args:\n        frameNumber: the frame number\n        fps: the frame rate of the video\n        \n    Returns:\n        frameHours: the hours of the frame\n        frameMinutes: the minutes of the frame\n        frameSeconds: the seconds of the frame\n        frameMilliseconds: the milliseconds of the frame\n    \"\"\"\n    \n    # get the time of the frame\n    frameTime = frameNumber / fps\n    frameHours = int(frameTime / 3600)\n    frameMinutes = int((frameTime - frameHours * 3600) / 60)\n    frameSeconds = int(frameTime - frameHours * 3600 - frameMinutes * 60)\n    frameMilliseconds = int(\n        (frameTime - frameHours * 3600 - frameMinutes * 60 - frameSeconds) * 1000)\n    \n    # print them in formal time format\n    return frameHours, frameMinutes, frameSeconds, frameMilliseconds\n\ndef class_name_to_id(class_name):\n    \n    \"\"\"\n    Summary:\n        Map a class name to its id in the coco dataset.\n        \n    Args:\n        class_name: the class name\n        \n    Returns:\n        class_id: the id of the class\n    \"\"\"\n    \n    try:\n        # map from coco_classes(a list of coco class names) to class_id\n        return coco_classes.index(class_name)\n    except:\n        # this means that the class name is not in the coco dataset\n        return -1\n\ndef compute_iou(box1, box2):\n    \n    \"\"\"\n    Summary:\n        Computes IOU between two bounding boxes.\n\n    Args:\n        box1 (list): List of 4 coordinates (xmin, ymin, xmax, ymax) of the first box.\n        box2 (list): List of 4 coordinates (xmin, ymin, xmax, ymax) of the second box.\n\n    Returns:\n        iou (float): IOU between the two boxes.\n    \"\"\"\n    \n    # Compute intersection coordinates\n    xmin = max(box1[0], box2[0])\n    ymin = max(box1[1], box2[1])\n    xmax = min(box1[2], box2[2])\n    ymax = min(box1[3], box2[3])\n\n    # Compute intersection area\n    if xmin < xmax and ymin < ymax:\n        intersection_area = (xmax - xmin) * (ymax - ymin)\n    else:\n        intersection_area = 0\n\n    # Compute union area\n    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])\n    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])\n    union_area = box1_area + box2_area - intersection_area\n\n    # Compute IOU\n    iou = intersection_area / union_area if union_area > 0 else 0\n\n    return iou\n\ndef compute_iou_exact(shape1, shape2):\n    \n    \"\"\"\n    Summary:\n        Computes IOU between two polygons.\n    \n    Args:\n        shape1 (list): List of 2D coordinates(also list) of the first polygon.\n        shape2 (list): List of 2D coordinates(also list) of the second polygon.\n        \n    Returns:\n        iou (float): IOU between the two polygons.\n    \"\"\"\n    \n    shape1 = [tuple(x) for x in shape1]\n    shape2 = [tuple(x) for x in shape2]\n    polygon1 = Polygon(shape1)\n    polygon2 = Polygon(shape2)\n    if polygon1.intersects(polygon2) is False:\n        return 0\n    intersection = polygon1.intersection(polygon2).area\n    union = polygon1.union(polygon2).area\n    iou = intersection / union if union > 0 else 0\n    return iou\n\ndef match_detections_with_tracks(detections, tracks, iou_threshold=0.5):\n    \n    \"\"\"\n    Summary:\n        Match detections with tracks based on their bounding boxes using IOU threshold.\n\n    Args:\n        detections (list): List of detections, each detection is a dictionary with keys (bbox, confidence, class_id)\n        tracks (list): List of tracks, each track is a tuple of (bboxes, track_id, class, conf)\n        iou_threshold (float): IOU threshold for matching detections with tracks.\n\n    Returns:\n        matched_detections (list): List of detections that are matched with tracks, each detection is a dictionary with keys (bbox, confidence, class_id)\n        unmatched_detections (list): List of detections that are not matched with any tracks, each detection is a dictionary with keys (bbox, confidence, class_id)\n    \"\"\"\n    \n    matched_detections = []\n    unmatched_detections = []\n\n    # Loop through each detection\n    for detection in detections:\n        detection_bbox = detection['bbox']\n        # Loop through each track\n        max_iou = 0\n        matched_track = None\n        for track in tracks:\n            track_bbox = track[0:4]\n\n            # Compute IOU between detection and track\n            iou = compute_iou(detection_bbox, track_bbox)\n\n            # Check if IOU is greater than threshold and better than previous matches\n            if iou > iou_threshold and iou > max_iou:\n                matched_track = track\n                max_iou = iou\n\n        # If a track was matched, add detection to matched_detections list and remove the matched track from tracks list\n        if matched_track is not None:\n            detection['group_id'] = int(matched_track[4])\n            matched_detections.append(detection)\n            tracks.remove(matched_track)\n        else:\n            unmatched_detections.append(detection)\n\n    return matched_detections, unmatched_detections\n\ndef get_boxes_conf_classids_segments(shapes):\n    \n    \"\"\"\n    Summary:\n        Get bounding boxes, confidences, class ids, and segments from shapes (NOT QT).\n        \n    Args:\n        shapes: a list of shapes\n        \n    Returns:\n        boxes: a list of bounding boxes \n        confidences: a list of confidences\n        class_ids: a list of class ids\n        segments: a list of segments \n    \"\"\"\n    \n    boxes = []\n    confidences = []\n    class_ids = []\n    segments = []\n    for s in shapes:\n        label = s[\"label\"]\n        points = s[\"points\"]\n        # points are one dimensional array of x1,y1,x2,y2,x3,y3,x4,y4\n        # we will convert it to a 2 dimensional array of points (segment)\n        segment = []\n        for j in range(0, len(points), 2):\n            segment.append([int(points[j]), int(points[j + 1])])\n        # if points is empty pass\n        # if len(points) == 0:\n        #     continue\n        segments.append(segment)\n\n        boxes.append(get_bbox_xyxy(segment))\n        confidences.append(float(s[\"content\"]))\n        class_ids.append(coco_classes.index(\n            label)if label in coco_classes else -1)\n\n    return boxes, confidences, class_ids, segments\n\ndef convert_qt_shapes_to_shapes(qt_shapes):\n    \n    \"\"\"\n    Summary:\n        Convert QT shapes to shapes.\n        \n    Args:\n        qt_shapes: a list of QT shapes\n        \n    Returns:\n        shapes: a list of shapes\n    \"\"\"\n    \n    shapes = []\n    for s in qt_shapes:\n        shapes.append(dict(\n            label=s.label.encode(\"utf-8\") if PY2 else s.label,\n            # convert points into 1D array\n            points=flattener(s.points),\n            bbox=get_bbox_xyxy([(p.x(), p.y()) for p in s.points]),\n            group_id=s.group_id,\n            content=s.content,\n            shape_type=s.shape_type,\n            flags=s.flags,\n        ))\n    return shapes\n\ndef convert_shapes_to_qt_shapes(shapes):\n    qt_shapes = []\n    for shape in shapes:\n        label = shape[\"label\"]\n        points = shape[\"points\"]\n        bbox = shape[\"bbox\"]\n        shape_type = shape[\"shape_type\"]\n        # flags = shape[\"flags\"]\n        content = shape[\"content\"]\n        group_id = shape[\"group_id\"]\n        # other_data = shape[\"other_data\"]\n\n        if not points:\n            # skip point-empty shape\n            continue\n\n        shape = Shape(\n            label=label,\n            shape_type=shape_type,\n            group_id=group_id,\n            content=content,\n        )\n        for i in range(0, len(points), 2):\n            shape.addPoint(QtCore.QPointF(points[i], points[i + 1]))\n        shape.close()\n        qt_shapes.append(shape)\n    return qt_shapes\n\ndef convert_QT_to_cv(incomingImage):\n    \n    \"\"\"\n    Summary:\n        Convert QT image to cv image MAT format.\n        \n    Args:\n        incomingImage: a QT image\n        \n    Returns:\n        arr: a cv image MAT format\n    \"\"\"\n    incomingImage = incomingImage.convertToFormat(QtGui.QImage.Format.Format_ARGB32)\n\n    width = incomingImage.width()\n    height = incomingImage.height()\n\n    ptr = incomingImage.bits()\n    ptr.setsize(incomingImage.sizeInBytes())\n    arr = np.array(ptr).reshape(height, width, 4)  # Copies the data\n    return arr\n\ndef convert_cv_to_qt(cv_img):\n    \n    \"\"\"\n    Summary:\n        Convert cv image to QT image format.\n        \n    Args:\n        cv_img: a cv image\n        \n    Returns:\n        convert_to_Qt_format: a QT image format\n    \"\"\"\n    \n    rgb_image = cv2.cvtColor(cv_img, cv2.COLOR_BGR2RGB)\n    h, w, ch = rgb_image.shape\n    bytes_per_line = ch * w\n    convert_to_Qt_format = QtGui.QImage(\n        rgb_image.data, w, h, bytes_per_line, QtGui.QImage.Format.Format_RGB888)\n    return convert_to_Qt_format\n\ndef SAM_rects_to_boxes(rects):\n    \n    \"\"\"\n    Summary:\n        Convert a list of QT rectangles to a list of bounding boxes.\n        \n    Args:\n        rects: a list of QT rectangles\n        \n    Returns:\n        res: a list of bounding boxes\n    \"\"\"\n    \n    res = []\n    for rect in rects:\n        listPOINTS = [min(rect[0].x(), rect[1].x()),\n                        min(rect[0].y(), rect[1].y()),\n                        max(rect[0].x(), rect[1].x()),\n                        max(rect[0].y(), rect[1].y())]\n        listPOINTS = [int(round(x)) for x in listPOINTS]\n        res.append(listPOINTS)\n    if len(res) == 0:\n        res = None\n    return res\n\ndef SAM_points_and_labels_from_coordinates(coordinates):\n    \n    \"\"\"\n    Summary:\n        Convert a list of coordinates to a list of points and a list of labels.\n        \n    Args:\n        coordinates: a list of coordinates\n        \n    Returns:\n        input_points: a list of points\n        input_labels: a list of labels\n    \"\"\"\n    \n    input_points = []\n    input_labels = []\n    for coordinate in coordinates:\n        input_points.append(\n            [int(round(coordinate[0])), int(round(coordinate[1]))])\n        input_labels.append(coordinate[2])\n    if len(input_points) == 0:\n        input_points = None\n        input_labels = None\n    else:\n        input_points = np.array(input_points)\n        input_labels = np.array(input_labels)\n\n    return input_points, input_labels\n\ndef load_objects_from_json__json(json_file_name, nTotalFrames):\n    \n    \"\"\"\n    Summary:\n        Load objects from a json file using json library.\n        \n    Args:\n        json_file_name: the name of the json file\n        nTotalFrames: the total number of frames\n        \n    Returns:\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n    \"\"\"\n    \n    listObj = [{'frame_idx': i + 1, 'frame_data': []}\n                for i in range(nTotalFrames)]\n    if not os.path.exists(json_file_name):\n        with open(json_file_name, 'w') as jf:\n            json.dump(listObj, jf,\n                        indent=4,\n                        separators=(',', ': '))\n        jf.close()\n    with open(json_file_name, 'r') as jf:\n        listObj = json.load(jf)\n    jf.close()\n    return listObj\n\ndef load_objects_to_json__json(json_file_name, listObj):\n    \n    \"\"\"\n    Summary:\n        Load objects to a json file using json library.\n        \n    Args:\n        json_file_name: the name of the json file\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n        \n    Returns:\n        None\n    \"\"\"\n    \n    with open(json_file_name, 'w') as json_file:\n        json.dump(listObj, json_file,\n                    indent=4,\n                    separators=(',', ': '))\n    json_file.close()\n\ndef load_objects_from_json__orjson(json_file_name, nTotalFrames):\n    \n    \"\"\"\n    Summary:\n        Load objects from a json file using orjson library.\n        \n    Args:\n        json_file_name: the name of the json file\n        nTotalFrames: the total number of frames\n        \n    Returns:\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n    \"\"\"\n    \n    listObj = [{'frame_idx': i + 1, 'frame_data': []}\n                for i in range(nTotalFrames)]\n    if not os.path.exists(json_file_name):\n        with open(json_file_name, \"wb\") as jf:\n            jf.write(orjson.dumps(listObj))\n        jf.close()\n    with open(json_file_name, \"rb\") as jf:\n        listObj = orjson.loads(jf.read())\n    jf.close()\n    return listObj\n\ndef load_objects_to_json__orjson(json_file_name, listObj):\n    \n    \"\"\"\n    Summary:\n        Load objects to a json file using orjson library.\n        \n    Args:\n        json_file_name: the name of the json file\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n        \n    Returns:\n        None\n    \"\"\"\n    \n    with open(json_file_name, \"wb\") as jf:\n        jf.write(orjson.dumps(listObj, option=orjson.OPT_INDENT_2))\n    jf.close()\n\ndef scaleQTshape(self, originalshape, center, ratioX, ratioY):\n    \n    \"\"\"\n    Summary:\n        Scale a QT shape live in the canvas. \n        according to a center point and two ratios.\n        \n    Args:\n        self: the main window object to access the canvas\n        originalshape: the original shape\n        center: the center point\n        ratioX: the ratio of the x axis\n        ratioY: the ratio of the y axis\n        \n    Returns:\n        None\n    \"\"\"\n    \n    ratioX = ratioX / 100\n    ratioY = ratioY / 100\n\n    shape = self.canvas.selectedShapes[0]\n    self.canvas.shapes.remove(shape)\n    self.canvas.selectedShapes.remove(shape)\n    self.remLabels([shape])\n    for i in range(len(shape.points)):\n        shape.points[i].setX(\n            (originalshape.points[i].x() - center[0]) * ratioX + center[0])\n        shape.points[i].setY(\n            (originalshape.points[i].y() - center[1]) * ratioY + center[1])\n    self.canvas.shapes.append(shape)\n    self.canvas.selectedShapes.append(shape)\n    self.addLabel(shape)\n\ndef is_id_repeated(self, group_id, frameIdex=-1):\n    \n    \"\"\"\n    Summary:\n        Check if a group id is repeated in the current frame or in all frames.\n        \n    Args:\n        self: the main window object to access the canvas\n        group_id: the group id\n        frameIdex: the frame index (-1 means the current frame)\n        \n    Returns:\n        True if the group id is repeated, False otherwise\n    \"\"\"\n    \n    if frameIdex == -1:\n        frameIdex = self.INDEX_OF_CURRENT_FRAME\n        \n    listObj = self.load_objects_from_json__orjson()\n    \n    for object_ in listObj[frameIdex - 1]['frame_data']:\n        if object_['tracker_id'] == group_id:\n            return True\n    \n    return False\n\ndef checkKeyFrames(ids, keyFrames):\n    \n    \"\"\"\n    Summary:\n        Check if all the ids have at least two key frames.\n        \n    Args:\n        ids: a list of ids\n        keyFrames: a dictionary of key frames\n        \n    Returns:\n        allAccepted: True if all the ids have at least two key frames, False otherwise\n        idsToTrack: a list of ids that have at least two key frames\n    \"\"\"\n    \n    idsToTrack = []\n    allAccepted = True\n    for id in ids:\n        try:\n            if len(keyFrames['id_' + str(id)]) == 1:\n                allAccepted = False\n            else:\n                idsToTrack.append(id)\n        except:\n            allAccepted = False\n    \n    allRejected = len(idsToTrack) == 0\n    \n    return allAccepted, allRejected, idsToTrack\n\ndef getInterpolated(baseObject, baseObjectFrame, nextObject, nextObjectFrame, curFrame):\n    \n    \"\"\"\n    Summary:\n        Interpolate a shape between two frames using linear interpolation.\n        \n    Args:\n        baseObject: the base object\n        baseObjectFrame: the base object frame\n        nextObject: the next object\n        nextObjectFrame: the next object frame\n        curFrame: the frame to interpolate\n        \n    Returns:\n        cur: the interpolated shape\n    \"\"\"\n    \n    prvR = (nextObjectFrame - curFrame) / (nextObjectFrame - baseObjectFrame)\n    nxtR = (curFrame - baseObjectFrame) / (nextObjectFrame - baseObjectFrame)\n    \n    cur_bbox = prvR * np.array(baseObject['bbox']) + nxtR * np.array(nextObject['bbox'])\n    cur_bbox = [int(cur_bbox[i]) for i in range(len(cur_bbox))]\n\n    (baseObject['segment'], nextObject['segment']) = handleTwoSegments(\n                                                        baseObject['segment'], nextObject['segment'])\n    \n    cur_segment = prvR * np.array(baseObject['segment']) + nxtR * np.array(nextObject['segment'])\n    cur_segment = [[int(sublist[0]), int(sublist[1])] for sublist in cur_segment]\n\n    cur = copy.deepcopy(baseObject)\n    cur['bbox'] = cur_bbox\n    cur['segment'] = cur_segment\n    \n    return cur\n\ndef update_saved_models_json(cwd):\n    \n    \"\"\"\n    Summary:\n        Update the saved models json file.\n    \"\"\"\n    \n    checkpoints_dir = cwd + \"/mmdetection/checkpoints/\"\n    # list all the files in the checkpoints directory\n    try:\n        files = os.listdir(checkpoints_dir)\n    except:\n        # if checkpoints directory does not exist, create it\n        os.mkdir(checkpoints_dir)\n    with open(cwd + '/models_menu/models_json.json') as f:\n        models_json = json.load(f)\n    saved_models = {}\n    # saved_models[\"YOLOv8x\"] = {\"checkpoint\": \"yolov8x-seg.pt\", \"config\": \"none\"}\n    for model in models_json:\n        if model[\"Model\"] != \"SAM\":\n            if model[\"Checkpoint\"].split(\"/\")[-1] in os.listdir(checkpoints_dir):\n                saved_models[model[\"Model Name\"]] = {\n                    \"id\": model[\"id\"], \"checkpoint\": model[\"Checkpoint\"], \"config\": model[\"Config\"]}\n\n    with open(cwd + \"/saved_models.json\", \"w\") as f:\n        json.dump(saved_models, f, indent=4) \n \ndef delete_id_from_rec_and_traj(id, id_frames_rec, trajectories, frames):\n    \n    \"\"\"\n    Summary:\n        Delete an id from id_frames_rec and trajectories.\n        \n    Args:\n        id: the id to delete\n        id_frames_rec: a dictionary of id frames records\n        \n    Returns:\n    \"\"\"\n    \n    # remove frames from id_frames_rec for this id\n    id_frames_rec['id_' + str(id)] = id_frames_rec['id_' + str(id)] - set(frames)\n    \n    # remove frames from trajectories for this id\n    for frame in frames:\n        trajectories['id_' + str(id)][frame - 1] = (-1, -1)\n        \n    return id_frames_rec, trajectories\n    \ndef adjust_shapes_to_original_image(shapes, x1, y1, area_points):\n    \n    shape1 = [tuple([int(x[0]), int(x[1])]) for x in area_points]\n    polygon1 = Polygon(shape1)\n    final = []\n    \n    for shape in shapes:\n        shape['points'] = [shape['points'][i] + x1 if i % 2 == 0 else shape['points'][i] + y1 for i in range(len(shape['points']))]\n        shape['bbox'] = [shape['bbox'][0] + x1, shape['bbox'][1] + y1, shape['bbox'][2] + x1, shape['bbox'][3] + y1]\n        \n        points = shape[\"points\"]\n        shape2 = [tuple([int(points[z]), int(points[z + 1])])\n                for z in range(0, len(points), 2)]\n        polygon2 = Polygon(shape2)\n        if polygon1.intersects(polygon2):\n            final.append(shape)\n    \n    return final\n\ndef track_area_adjustedBboex(area_points, dims, ratio = 0.1):\n    \n    [x1, y1, x2, y2] = get_bbox_xyxy(area_points)\n    [w, h] = [x2 - x1, y2 - y1]\n    x1 = int(max(0, x1 - w * ratio))\n    y1 = int(max(0, y1 - h * ratio))\n    x2 = int(min(dims[1], x2 + w * ratio))\n    y2 = int(min(dims[0], y2 + h * ratio))\n    \n    return [x1, y1, x2, y2]\n\ndef get_contour_length(contour):\n    contour_start = contour\n    contour_end = np.r_[contour[1:], contour[0:1]]\n    return np.linalg.norm(contour_end - contour_start, axis=1).sum()\n\ndef mask_to_polygons(mask, n_points=25, resize_factors=[1.0, 1.0]):\n    mask = mask > 0.0\n    contours = skimage.measure.find_contours(mask)\n    if len(contours) == 0:\n        return []\n    contour = max(contours, key=get_contour_length)\n    coords = skimage.measure.approximate_polygon(\n        coords=contour,\n        tolerance=np.ptp(contour, axis=0).max() / 100,\n    )\n\n    coords = coords * resize_factors\n    # convert coords from x y to y x\n    coords = np.fliplr(coords)\n\n    # segment_points are a list of coords\n    segment_points = coords.astype(int)\n    polygon = segment_points\n    return polygon\n\ndef polygon_to_shape(polygon, score, className=\"SAM instance\"):\n    shape = {}\n    shape[\"label\"] = className\n    shape[\"content\"] = str(round(score, 2))\n    shape[\"group_id\"] = None\n    shape[\"shape_type\"] = \"polygon\"\n    shape[\"bbox\"] = get_bbox_xyxy(polygon)\n\n    shape[\"flags\"] = {}\n    shape[\"other_data\"] = {}\n\n    # shape_points is result[\"seg\"] flattened\n    shape[\"points\"] = [item for sublist in polygon\n                            for item in sublist]\n    # print(shape)\n    return shape\n\ndef OURnms_confidenceBased(shapes, iou_threshold=0.5):\n    \"\"\"\n    Perform non-maximum suppression on a list of shapes based on their bounding boxes using IOU threshold.\n\n    Args:\n        shapes (list): List of shapes, each shape is a dictionary with keys (bbox, confidence, class_id)\n        iou_threshold (float): IOU threshold for non-maximum suppression.\n\n    Returns:\n        list: List of shapes after performing non-maximum suppression, each shape is a dictionary with keys (bbox, confidence, class_id)\n    \"\"\"\n    iou_threshold = float(iou_threshold)\n    for shape in shapes:\n        if shape['content'] is None:\n            shape['content'] = 1.0\n\n    # Sort shapes by their confidence\n    shapes.sort(key=lambda x: x['content'], reverse=True)\n\n    boxes, confidences, class_ids, segments = get_boxes_conf_classids_segments(\n        shapes)\n\n    toBeRemoved = []\n\n    # Loop through each shape\n    for i in range(len(shapes)):\n        shape_bbox = boxes[i]\n        # Loop through each remaining shape\n        for j in range(i + 1, len(shapes)):\n            remaining_shape_bbox = boxes[j]\n\n            # Compute IOU between shape and remaining_shape\n            iou = compute_iou(shape_bbox, remaining_shape_bbox)\n\n            # If IOU is greater than threshold, remove remaining_shape from shapes list\n            if iou > iou_threshold:\n                toBeRemoved.append(j)\n\n    shapesFinal = []\n    boxesFinal = []\n    confidencesFinal = []\n    class_idsFinal = []\n    segmentsFinal = []\n    for i in range(len(shapes)):\n        if i in toBeRemoved:\n            continue\n        shapesFinal.append(shapes[i])\n    boxesFinal, confidencesFinal, class_idsFinal, segmentsFinal = get_boxes_conf_classids_segments(\n        shapesFinal)\n\n    return shapesFinal, boxesFinal, confidencesFinal, class_idsFinal, segmentsFinal\n\n\ndef OURnms_areaBased_fromSAM(self, sam_result, iou_threshold=0.5):\n        \n    iou_threshold = float(iou_threshold)\n\n    # Sort shapes by their areas\n    sortedResult = sorted(sam_result, key=lambda x: x['area'], reverse=True)\n    masks = [ mask['segmentation'] for mask in sortedResult]\n    scores = [mask['stability_score'] for mask in sortedResult]\n    polygons = [mask_to_polygons(mask) for mask in masks]\n    \n    toBeRemoved = []\n\n    # Loop through each shape\n    if iou_threshold > 0.99:\n        for i in range(len(polygons)):\n            shape1 = polygons[i]\n            # Loop through each remaining shape\n            for j in range(i + 1, len(sortedResult)):\n                shape2 = polygons[j]\n                # Compute IOU between shape and remaining_shape\n                iou = compute_iou_exact(shape1, shape2)\n                # If IOU is greater than threshold, remove remaining_shape from shapes list\n                if iou > iou_threshold:\n                    toBeRemoved.append(j)\n\n    shapes = []\n    for i in range(len(polygons)):\n        if i in toBeRemoved:\n            continue\n        shapes.append(self.polygon_to_shape(polygons[i], scores[i], f'X{i}'))\n\n    return shapes\n\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/helpers/visualizations.py",
    "content": "import numpy as np\nimport cv2\nfrom .mathOps import *\n\n\ncoco_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',\n                'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n                'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n# make a list of 12 unique colors as we will use them to draw bounding boxes of different classes in different colors\n# so the calor palette will be used to draw bounding boxes of different classes in different colors\n# the color pallette should have the famous 12 colors as red, green, blue, yellow, cyan, magenta, white, black, gray, brown, pink, and orange in bgr format\n\n\n\ndef draw_bb_id(flags, image, x, y, w, h, id, conf, label, color=(0, 0, 255), thickness=1):\n    if image is None:\n        print(\"Image is None\")\n        return\n    \n    \"\"\"\n    Summary:\n        Draw bounding box and id on an image (Single id).\n        \n    Args:\n        flags: a dictionary of flags (bbox, id, class)\n        image: a cv2 image\n        x: x coordinate of the bounding box\n        y: y coordinate of the bounding box\n        w: width of the bounding box\n        h: height of the bounding box\n        id: id of the shape\n        label: label of the shape (class name)\n        color: color of the bounding box\n        thickness: thickness of the bounding box\n        \n    Returns:\n        image: a cv2 image\n    \"\"\"\n    \n    if flags['bbox']:\n        image = cv2.rectangle(\n            image, (x, y), (x + w, y + h), color, thickness + 1)\n\n    if flags['id'] or flags['class'] or flags['conf']:\n        text = ''\n        if flags['id'] and flags['class']:\n            text = f'#{id} [{label}]'\n        if flags['id'] and not flags['class']:\n            text = f'#{id}'\n        if not flags['id'] and flags['class']:\n            text = f'[{label}]'\n        if flags['conf']:\n            text = f'{text} {conf}' if len(text) > 0 else f'{conf}'\n\n        fontscale = image.shape[0] / 2000\n        if fontscale < 0.3:\n            fontscale = 0.3\n        elif fontscale > 5:\n            fontscale = 5\n\n        text_width, text_height = cv2.getTextSize(\n            text, cv2.FONT_HERSHEY_SIMPLEX, fontscale, thickness)[0]\n        text_x = x + 10\n        text_y = y - 10\n\n        text_background_x1 = x\n        text_background_y1 = y - 2 * 10 - text_height\n\n        text_background_x2 = x + 2 * 10 + text_width\n        text_background_y2 = y\n\n        # fontscale is proportional to the image size\n        cv2.rectangle(\n            img=image,\n            pt1=(text_background_x1, text_background_y1),\n            pt2=(text_background_x2, text_background_y2),\n            color=color,\n            thickness=cv2.FILLED,\n        )\n        cv2.putText(\n            img=image,\n            text=text,\n            org=(text_x, text_y),\n            fontFace=cv2.FONT_HERSHEY_SIMPLEX,\n            fontScale=fontscale,\n            color=(0, 0, 0),\n            thickness=thickness,\n            lineType=cv2.LINE_AA,\n        )\n\n    # there is no bbox but there is id or class\n    if (not flags['bbox']) and (flags['id'] or flags['class'] or flags['conf']):\n        image = cv2.line(image, (x + int(w / 2), y + int(h / 2)),\n                            (x + 50, y - 5), color, thickness + 1)\n\n    return image\n\ndef draw_trajectories(trajectories, CurrentFrameIndex, flags, img, shapes):\n    \n    \"\"\"\n    Summary:\n        Draw trajectories on an image.\n        \n    Args:\n        trajectories: a dictionary of trajectories\n        CurrentFrameIndex: the current frame index\n        flags: a dictionary of flags (traj, mask)\n        img: a cv2 image\n        shapes: a list of shapes\n        \n    Returns:\n        img: a cv2 image\n    \"\"\"\n    \n    x = trajectories['length']\n    for shape in shapes:\n        id = shape[\"group_id\"]\n        pts_traj = trajectories['id_' + str(id)][max(\n            CurrentFrameIndex - x, 0): CurrentFrameIndex]\n        pts_poly = np.array([[x, y] for x, y in zip(\n            shape[\"points\"][0::2], shape[\"points\"][1::2])])\n        color_poly = trajectories['id_color_' + str(\n            id)]\n\n        if flags['mask']:\n            original_img = img.copy()\n            if pts_poly is not None:\n                cv2.fillPoly(img, pts=[pts_poly], color=color_poly)\n            alpha = trajectories['alpha']\n            img = cv2.addWeighted(original_img, alpha, img, 1 - alpha, 0)\n        for i in range(len(pts_traj) - 1, 0, - 1):\n\n            thickness = (len(pts_traj) - i <= 10) * 1 + (len(pts_traj) -\n                                                            i <= 20) * 1 + (len(pts_traj) - i <= 30) * 1 + 3\n            # max_thickness = 6\n            # thickness = max(1, round(i / len(pts_traj) * max_thickness))\n\n            if pts_traj[i - 1] is None or pts_traj[i] is None:\n                continue\n            if pts_traj[i] == (-1, - 1) or pts_traj[i - 1] == (-1, - 1):\n                break\n\n            # color_traj = tuple(int(0.95 * x) for x in color_poly)\n            color_traj = color_poly\n\n            if flags['traj']:\n                cv2.line(img, pts_traj[i - 1],\n                            pts_traj[i], color_traj, thickness)\n                if ((len(pts_traj) - 1 - i) % 10 == 0):\n                    cv2.circle(img, pts_traj[i], 3, (0, 0, 0), -1)\n\n    return img\n\ndef draw_bb_on_image(trajectories, CurrentFrameIndex, flags, nTotalFrames, image, shapes, image_qt_flag=True):\n    \n    \"\"\"\n    Summary:\n        Draw bounding boxes and trajectories on an image (multiple ids).\n        \n    Args:\n        trajectories: a dictionary of trajectories.\n        CurrentFrameIndex: the current frame index.\n        nTotalFrames: the total number of frames.\n        image: a QT image or a cv2 image.\n        shapes: a list of shapes.\n        image_qt_flag: a flag to indicate if the image is a QT image or a cv2 image.\n        \n    Returns:\n        img: a QT image or a cv2 image.\n    \"\"\"\n    \n    img = image\n    if image_qt_flag:\n        img = convert_QT_to_cv(image)\n\n    for shape in shapes:\n        id = shape[\"group_id\"]\n        label = shape[\"label\"]\n        conf = shape[\"content\"]\n\n        # color calculation\n        # idx = coco_classes.index(label) if label in coco_classes else -1\n        # idx = idx % len(color_palette)\n        # color = color_palette[idx] if idx != -1 else (0, 0, 255)\n        # label_hash = hash(label)\n        # idx = abs(label_hash) % len(color_palette)\n        label_ascii = sum([ord(c) for c in label])\n        idx = label_ascii % len(color_palette)\n        color = color_palette[idx]\n\n        (x1, y1, x2, y2) = shape[\"bbox\"]\n        x, y, w, h = int(x1), int(y1), int(x2 - x1), int(y2 - y1)\n        img = draw_bb_id(flags, img, x, y, w, h, id, conf,\n                                label, color, thickness=1)\n        center = (int((x1 + x2) / 2), int((y1 + y2) / 2))\n        try:\n            centers_rec = trajectories['id_' + str(id)]\n            try:\n                (xp, yp) = centers_rec[CurrentFrameIndex - 2]\n                (xn, yn) = center\n                if (xp == -1 or xn == -1):\n                    c = 5 / 0\n                r = 0.5\n                x = r * xn + (1 - r) * xp\n                y = r * yn + (1 - r) * yp\n                center = (int(x), int(y))\n            except:\n                pass\n            centers_rec[CurrentFrameIndex - 1] = center\n            trajectories['id_' +\n                                                    str(id)] = centers_rec\n            trajectories['id_color_' +\n                                                    str(id)] = color\n        except:\n            centers_rec = [(-1, - 1)] * int(nTotalFrames)\n            centers_rec[CurrentFrameIndex - 1] = center\n            trajectories['id_' +\n                                                    str(id)] = centers_rec\n            trajectories['id_color_' +\n                                                    str(id)] = color\n\n    # print(sys.getsizeof(trajectories))\n\n    img = draw_trajectories(trajectories, CurrentFrameIndex, flags, img, shapes)\n\n    if image_qt_flag:\n        img = convert_cv_to_qt(img, )\n\n    return img\n\ndef draw_bb_on_image_MODE(flags, image, shapes):\n    \n    \"\"\"\n    Summary:\n        Draw bounding boxes on an QT image (multiple ids) in MODE image.\n        \n    Args:\n        flags: a dictionary of flags.\n        image: a QT image.\n        shapes: a list of shapes.\n        \n    Returns:\n        img: a QT image.\n    \"\"\"\n    \n    img = convert_QT_to_cv(image)\n\n    for shape in shapes:\n        \n        label = shape[\"label\"]\n        if label == \"SAM instance\":\n            continue\n        conf = shape[\"content\"]\n        pts_poly = np.array([[x, y] for x, y in zip(\n            shape[\"points\"][0::2], shape[\"points\"][1::2])])\n\n        # color calculation\n        # idx = coco_classes.index(label) if label in coco_classes else -1\n        # idx = idx % len(color_palette)\n        # color = color_palette[idx] if idx != -1 else (0, 0, 255)\n        # label_hash = hash(label)\n        # idx = abs(label_hash) % len(color_palette)\n        label_ascii = sum([ord(c) for c in label])\n        idx = label_ascii % len(color_palette)\n        color = color_palette[idx]\n\n        (x1, y1, x2, y2) = shape[\"bbox\"]\n        x, y, w, h = int(x1), int(y1), int(x2 - x1), int(y2 - y1)\n        \n        img = draw_bb_label_on_image_MODE(flags, img, x, y, w, h,\n                                label, conf, color, thickness=1)\n        \n        if flags['mask']:\n            original_img = img.copy()\n            if pts_poly is not None:\n                cv2.fillPoly(img, pts=[pts_poly], color=color)\n            alpha = 0.70\n            img = cv2.addWeighted(original_img, alpha, img, 1 - alpha, 0)\n    \n    img = convert_cv_to_qt(img, )\n\n    return img\n\ndef draw_bb_label_on_image_MODE(flags, image, x, y, w, h, label, conf, color=(0, 0, 255), thickness=1):\n    if image is None:\n        print(\"Image is None\")\n        return\n    \n    \"\"\"\n    Summary:\n        Draw bounding box and id on an image (Single id).\n        \n    Args:\n        flags: a dictionary of flags (bbox, id, class)\n        image: a cv2 image\n        x: x coordinate of the bounding box\n        y: y coordinate of the bounding box\n        w: width of the bounding box\n        h: height of the bounding box\n        label: label of the shape (class name)\n        color: color of the bounding box\n        thickness: thickness of the bounding box\n        \n    Returns:\n        image: a cv2 image\n    \"\"\"\n    \n    if flags['bbox']:\n        image = cv2.rectangle(\n            image, (x, y), (x + w, y + h), color, thickness + 1)\n\n    if flags['conf'] or flags['class']:\n        \n        if flags['conf'] and flags['class']:\n            text = f'[{label}] {conf}'\n        if flags['conf'] and not flags['class']:\n            text = f'{conf}'\n        if not flags['conf'] and flags['class']:\n            text = f'[{label}]'\n\n        fontscale = image.shape[0] / 2000\n        if fontscale < 0.3:\n            fontscale = 0.3\n        elif fontscale > 5:\n            fontscale = 5\n        text_width, text_height = cv2.getTextSize(\n            text, cv2.FONT_HERSHEY_SIMPLEX, fontscale, thickness)[0]\n        text_x = x + 10\n        text_y = y - 10\n\n        text_background_x1 = x\n        text_background_y1 = y - 2 * 10 - text_height\n\n        text_background_x2 = x + 2 * 10 + text_width\n        text_background_y2 = y\n\n        # fontscale is proportional to the image size\n        cv2.rectangle(\n            img=image,\n            pt1=(text_background_x1, text_background_y1),\n            pt2=(text_background_x2, text_background_y2),\n            color=color,\n            thickness=cv2.FILLED,\n        )\n        cv2.putText(\n            img=image,\n            text=text,\n            org=(text_x, text_y),\n            fontFace=cv2.FONT_HERSHEY_SIMPLEX,\n            fontScale=fontscale,\n            color=(0, 0, 0),\n            thickness=thickness,\n            lineType=cv2.LINE_AA,\n        )\n\n    # there is no bbox but there is id or class\n    if (not flags['bbox']) and (flags['conf'] or flags['class']):\n        image = cv2.line(image, (x + int(w / 2), y + int(h / 2)),\n                            (x + 50, y - 5), color, thickness + 1)\n\n    return image\n\n\n\n\n\n\n\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/image.py",
    "content": "import base64\nimport io\n\nimport numpy as np\nimport PIL.ExifTags\nimport PIL.Image\nimport PIL.ImageOps\n\n\ndef img_data_to_pil(img_data):\n    f = io.BytesIO()\n    f.write(img_data)\n    img_pil = PIL.Image.open(f)\n    return img_pil\n\n\ndef img_data_to_arr(img_data):\n    img_pil = img_data_to_pil(img_data)\n    img_arr = np.array(img_pil)\n    return img_arr\n\n\ndef img_b64_to_arr(img_b64):\n    img_data = base64.b64decode(img_b64)\n    img_arr = img_data_to_arr(img_data)\n    return img_arr\n\n\ndef img_pil_to_data(img_pil):\n    f = io.BytesIO()\n    img_pil.save(f, format=\"PNG\")\n    img_data = f.getvalue()\n    return img_data\n\n\ndef img_arr_to_b64(img_arr):\n    img_pil = PIL.Image.fromarray(img_arr)\n    f = io.BytesIO()\n    img_pil.save(f, format=\"PNG\")\n    img_bin = f.getvalue()\n    if hasattr(base64, \"encodebytes\"):\n        img_b64 = base64.encodebytes(img_bin)\n    else:\n        img_b64 = base64.encodestring(img_bin)\n    return img_b64\n\n\ndef img_data_to_png_data(img_data):\n    with io.BytesIO() as f:\n        f.write(img_data)\n        img = PIL.Image.open(f)\n\n        with io.BytesIO() as f:\n            img.save(f, \"PNG\")\n            f.seek(0)\n            return f.read()\n\n\ndef apply_exif_orientation(image):\n    try:\n        exif = image._getexif()\n    except AttributeError:\n        exif = None\n\n    if exif is None:\n        return image\n\n    exif = {\n        PIL.ExifTags.TAGS[k]: v\n        for k, v in exif.items()\n        if k in PIL.ExifTags.TAGS\n    }\n\n    orientation = exif.get(\"Orientation\", None)\n\n    if orientation == 1:\n        # do nothing\n        return image\n    elif orientation == 2:\n        # left-to-right mirror\n        return PIL.ImageOps.mirror(image)\n    elif orientation == 3:\n        # rotate 180\n        return image.transpose(PIL.Image.ROTATE_180)\n    elif orientation == 4:\n        # top-to-bottom mirror\n        return PIL.ImageOps.flip(image)\n    elif orientation == 5:\n        # top-to-left mirror\n        return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_270))\n    elif orientation == 6:\n        # rotate 270\n        return image.transpose(PIL.Image.ROTATE_270)\n    elif orientation == 7:\n        # top-to-right mirror\n        return PIL.ImageOps.mirror(image.transpose(PIL.Image.ROTATE_90))\n    elif orientation == 8:\n        # rotate 90\n        return image.transpose(PIL.Image.ROTATE_90)\n    else:\n        return image\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/model_explorer.py",
    "content": "from PyQt6 import QtWidgets, QtCore, QtGui\nfrom PyQt6.QtWidgets import QDialog, QToolBar, QTableWidget, QTableWidgetItem, QVBoxLayout, QHBoxLayout, QComboBox, QCheckBox, QPushButton, QProgressDialog, QApplication, QWidget\nimport json\nimport urllib.request\nimport requests\nimport os\nimport time\n\n# store json file into list of dictionaries\ncwd = os.getcwd()\nwith open(cwd + '/models_menu/models_json.json') as f:\n    models_json = json.load(f)\n\n\nclass ModelExplorerDialog(QDialog):\n    \"\"\"\n    A dialog window for exploring available models and downloading them.\n\n    Attributes:\n        main_window (QMainWindow): The main window of the application.\n        mute (bool): Whether to mute notifications or not.\n        notification (function): A function for displaying notifications.\n    \"\"\"\n\n    def __init__(self, main_window=None, mute=None, notification=None):\n        \"\"\"\n        Initializes the ModelExplorerDialog.\n\n        Args:\n            main_window (QMainWindow): The main window of the application.\n            mute (bool): Whether to mute notifications.\n            notification (function): A function for displaying notifications.\n        \"\"\"\n        super().__init__()\n        self.main_window = main_window\n        self.mute = mute\n        self.notification = notification\n        self.setWindowTitle(\"Model Explorer\")\n        self.setWindowFlags(self.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n\n        # Define the columns of the table\n        self.cols_labels = [\"id\", \"Model Name\", \"Backbone\", \"Lr schd\",\n                            \"Memory (GB)\", \"Inference Time (fps)\", \"box AP\", \"mask AP\", \"Checkpoint Size (MB)\"]\n\n        # Get the unique model names\n        self.model_keys = sorted(\n            list(set([model['Model'] for model in models_json])))\n\n        # Set up the layout\n        layout = QVBoxLayout()\n        self.setLayout(layout)\n\n        # Set up the toolbar\n        toolbar = QToolBar()\n        layout.addWidget(toolbar)\n\n        # Set up the model type dropdown menu\n        self.model_type_dropdown = QComboBox()\n        self.model_type_dropdown.addItems([\"All\"] + self.model_keys)\n        self.model_type_dropdown.currentIndexChanged.connect(self.search)\n        toolbar.addWidget(self.model_type_dropdown)\n\n        # Set up the checkboxes\n        self.available_checkbox = QCheckBox(\"Downloaded\")\n        self.available_checkbox.clicked.connect(self.search)\n        toolbar.addWidget(self.available_checkbox)\n        self.not_available_checkbox = QCheckBox(\"Not Downloaded\")\n        self.not_available_checkbox.clicked.connect(self.search)\n        toolbar.addWidget(self.not_available_checkbox)\n\n        # Set up the search button\n        # search_button = QPushButton(\"Search\")\n        # search_button.clicked.connect(self.search)\n        # toolbar.addWidget(search_button)\n\n        # Set up the button for opening the checkpoints directory\n        open_checkpoints_dir_button = QPushButton(\"Open Checkpoints Dir\")\n        # add icon to the button\n        open_checkpoints_dir_button.setIcon(\n            QtGui.QIcon(cwd + '/labelme/icons/downloads.png'))\n        open_checkpoints_dir_button.setIconSize(QtCore.QSize(20, 20))\n        open_checkpoints_dir_button.clicked.connect(\n            self.open_checkpoints_dir)\n        toolbar.addWidget(open_checkpoints_dir_button)\n\n        # Set spacing\n        layout.setSpacing(10)\n\n        # Set up the table\n        self.table = QTableWidget()\n        layout.addWidget(self.table)\n\n        # Set up the number of rows and columns\n        self.num_rows = len(models_json)\n        self.num_cols = 9\n\n        # Make availability list\n        self.check_availability()\n\n        # Populate the table with default data\n        self.populate_table()\n\n        # Set up the submit and cancel buttons\n        button_layout = QHBoxLayout()\n        layout.addLayout(button_layout)\n\n        close_button = QPushButton(\"Ok\")\n        close_button.clicked.connect(self.close)\n        # add side padding to the button\n        close_button.setFixedWidth(100)\n\n    \n        # make the button in the middle of the layout, don't stretch\n        button_layout.addStretch()\n        button_layout.addWidget(close_button)\n        button_layout.addStretch()\n\n        # layout spacing\n        layout.setSpacing(10)\n\n    def populate_table(self):\n        \"\"\"\n        Populates the table with data from models_json.\n\n        Returns:\n            None\n        \"\"\"\n        # Clear the table (keep the header labels)\n        self.table.clearContents()\n        self.table.setRowCount(self.num_rows)\n        # +2 for the available cell and select row button\n        self.table.setColumnCount(self.num_cols + 2)\n\n        # Set the header labels\n        header = self.table.horizontalHeader()\n        self.table.setHorizontalHeaderLabels(\n            self.cols_labels + [\"Status\", \"Select Model\"])\n        header.setSectionResizeMode(QtWidgets.QHeaderView.ResizeMode.ResizeToContents)\n\n        # remove vertical header\n        self.table.verticalHeader().setVisible(False)\n        self.table.horizontalHeader().setSectionResizeMode(\n            QtWidgets.QHeaderView.ResizeMode.ResizeToContents)\n\n        # Populate the table with data\n        row_count = 0\n        for model in models_json:\n\n            col_count = 0\n            for key in self.cols_labels:\n                item = QTableWidgetItem(f\"{model[key]}\")\n                item.setTextAlignment(QtCore.Qt.AlignmentFlag.AlignCenter)\n                self.table.setItem(row_count, col_count, item)\n                col_count += 1\n\n            # Select Model column\n            self.selected_model = (-1, -1, -1)\n            select_row_button = QPushButton(\"Select Model\")\n\n\n            select_row_button.clicked.connect(self.select_model)\n\n\n            self.table.setContentsMargins(10, 10, 10, 10)\n            self.table.setCellWidget(row_count, 10, select_row_button)\n\n            # Downloaded column\n            if model[\"Downloaded\"]:\n                available_item = QTableWidgetItem(\"Downloaded\")\n                # make the text color dark green\n                available_item.setForeground(QtCore.Qt.GlobalColor.darkGreen)\n                self.table.setItem(row_count, 9, available_item)\n                available_item.setTextAlignment(QtCore.Qt.AlignmentFlag.AlignCenter)\n            else:\n                available_item = QPushButton(\"Requires Download\")\n                available_item.clicked.connect(\n                    self.create_download_callback(model[\"id\"]))\n                # add padding to button\n                available_item.setContentsMargins(10, 10, 10, 10)\n                # maek the button text color red\n                available_item.setStyleSheet(\"color: red\")\n                self.table.setCellWidget(row_count, 9, available_item)\n                # make select_row_button disabled\n                select_row_button.setEnabled(False)\n\n            # Disable SAM Selection\n            if model[\"Model\"] == \"SAM\":\n                select_row_button.setEnabled(False)\n                # change text\n                select_row_button.setText(\"Select from SAM Toolbar\")\n\n            row_count += 1\n\n    def search(self):\n        \"\"\"\n        Filters the table based on the selected model type and availability.\n\n        Returns:\n            None\n        \"\"\"\n        # Get the selected model type and availability\n        model_type = self.model_type_dropdown.currentText()\n        available = self.available_checkbox.isChecked()\n        not_available = self.not_available_checkbox.isChecked()\n\n        # Iterate over each row in the table\n        for row in range(self.num_rows):\n            show_row = True\n            # Filter by model type\n            if model_type != \"All\":\n                id = int(self.table.item(row, 0).text())\n                if models_json[id][\"Model\"] != model_type:\n                    show_row = False\n\n            # Filter by availability\n            if available or not_available:\n                available_text = self.table.item(row, 9)\n                try:\n                    available_text = available_text.text()\n                except AttributeError:\n                    pass\n                if available and available_text != \"Downloaded\":\n                    show_row = False\n                if not_available and available_text == \"Downloaded\":\n                    show_row = False\n\n            # Hide or show the row based on the filters\n            self.table.setRowHidden(row, not show_row)\n\n\n    def select_model(self):\n        \"\"\"\n        Gets the selected model from the table and sets it as the selected model.\n\n        Returns:\n            None\n        \"\"\"\n        # Get the button that was clicked\n        sender = self.sender()\n        # Get the row index of the button in the table\n        index = self.table.indexAt(sender.pos())\n        # Get the model id from the row index\n        row = index.row()\n        model_id = int(self.table.item(row, 0).text())\n        # Set the selected model as the model with this id\n        self.selected_model = models_json[model_id][\"Model Name\"], models_json[model_id][\"Config\"], models_json[model_id][\"Checkpoint\"],\n        self.accept()\n\n    def download_model(self, id):\n        \"\"\"\n        Downloads the model with the given id and updates the progress dialog.\n\n        Args:\n            id (int): The id of the model to download.\n\n        Returns:\n            None\n        \"\"\"\n        # Get the checkpoint link and model name for the model with this id\n        checkpoint_link = models_json[id][\"Checkpoint_link\"]\n        model_name = models_json[id][\"Model Name\"]\n\n        # Create a progress dialog\n        self.progress_dialog = QProgressDialog(\n            f\"Downloading {model_name}...\", \"Cancel\", 0, 100, self)\n        # Set the window title\n        self.progress_dialog.setWindowTitle(\"Downloading Model\")\n        self.progress_dialog.setWindowModality(QtCore.Qt.WindowModality.WindowModal)\n        self.progress_dialog.canceled.connect(self.cancel_download)\n        self.progress_dialog.show()\n\n        # Initialize variables for tracking download progress\n        self.start_time = time.time()\n        self.last_time = self.start_time\n        self.last_downloaded = 0\n        self.download_canceled = False\n\n        def handle_progress(block_num, block_size, total_size):\n            \"\"\"\n            Updates the progress dialog with the current download progress.\n\n            Args:\n                block_num (int): The number of blocks downloaded.\n                block_size (int): The size of each block.\n                total_size (int): The total size of the file being downloaded.\n\n            Returns:\n                None\n            \"\"\"\n            # failed flag\n            # Calculate the download progress\n            read_data = block_num * block_size\n            if total_size > 0:\n                download_percentage = read_data * 100 / total_size\n                self.progress_dialog.setValue(download_percentage)\n                self.progress_dialog.setLabelText(f\"Downloading {model_name}... \")\n                QApplication.processEvents()\n\n        failed = False\n        try:\n            # Download the file using requests\n            response = requests.get(checkpoint_link, stream=True)\n            total_size = int(response.headers.get('content-length', 0))\n            block_size = 1024\n            block_num = 0\n\n            # Save the downloaded file to disk\n            file_path = f\"{cwd}/mmdetection/checkpoints/{checkpoint_link.split('/')[-1]}\"\n            with open(file_path, 'wb') as f:\n                for data in response.iter_content(block_size):\n                    if self.download_canceled:\n                        break\n                    f.write(data)\n                    block_num += 1\n                    handle_progress(block_num, block_size, total_size)\n            if self.download_canceled:\n                # Delete the file if the download was canceled\n                os.remove(file_path)\n                print(\"Download canceled by user\")\n                failed = True\n        except Exception as e:\n            os.remove(file_path)\n            print(f\"Download error: {e}\")\n            failed = True\n\n        # Close the progress dialog and update the table\n        self.progress_dialog.close()\n        self.check_availability()\n        self.populate_table()\n        print(\"Download finished\")\n\n        # Show a notification if the main window is not active\n        try:\n            if not self.mute:\n                if not self.isActiveWindow():\n                    if not failed:\n                        self.notification(f\"{model_name} has been downloaded successfully\")\n                    else:\n                        self.notification(f\"Failed to download {model_name}\")\n        except:\n            pass\n\n\n    def cancel_download(self):\n        \"\"\"\n        Sets the download_canceled flag to True to cancel the download.\n\n        Returns:\n            None\n        \"\"\"\n        self.download_canceled = True\n\n\n    def create_download_callback(self, model_id):\n        \"\"\"\n        Returns a lambda function that downloads the model with the given id.\n\n        Args:\n            model_id (int): The id of the model to download.\n\n        Returns:\n            function: A lambda function that downloads the model with the given id.\n        \"\"\"\n        return lambda: self.download_model(model_id)\n\n\n    def check_availability(self):\n        \"\"\"\n        Checks the availability of each model in the table and updates the \"Downloaded\" column.\n\n        Returns:\n            None\n        \"\"\"\n        checkpoints_dir = cwd + \"/mmdetection/checkpoints/\"\n        for model in models_json:\n            if model[\"Checkpoint\"].split(\"/\")[-1] in os.listdir(checkpoints_dir):\n                model[\"Downloaded\"] = True\n            else:\n                model[\"Downloaded\"] = False\n\n\n    def open_checkpoints_dir(self):\n        \"\"\"\n        Opens the directory containing the downloaded checkpoints in the file explorer.\n\n        Returns:\n            None\n        \"\"\"\n        url = QtCore.QUrl.fromLocalFile(cwd + \"/mmdetection/checkpoints/\")\n        if not QtGui.QDesktopServices.openUrl(url):\n            # Print an error message if opening failed\n            print(\"Failed to open checkpoints directory\")\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/qt.py",
    "content": "from math import sqrt\nimport os.path as osp\n\nimport numpy as np\n\nfrom PyQt6 import QtCore\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\n\nhere = osp.dirname(osp.abspath(__file__))\n\n\ndef newIcon(icon):\n    icons_dir = osp.join(here, \"../icons\")\n    return QtGui.QIcon(osp.join(\":/\", icons_dir, \"%s.png\" % icon))\n\n\ndef newButton(text, icon=None, slot=None):\n    b = QtWidgets.QPushButton(text)\n    if icon is not None:\n        b.setIcon(newIcon(icon))\n    if slot is not None:\n        b.clicked.connect(slot)\n    return b\n\n\ndef newAction(\n    parent,\n    text,\n    slot=None,\n    shortcut=None,\n    icon=None,\n    tip=None,\n    checkable=False,\n    enabled=True,\n    checked=False,\n):\n    \"\"\"Create a new action and assign callbacks, shortcuts, etc.\"\"\"\n    a = QtGui.QAction(text, parent)\n    if icon is not None:\n        a.setIconText(text.replace(\" \", \"\\n\"))\n        a.setIcon(newIcon(icon))\n    if shortcut is not None:\n        if isinstance(shortcut, (list, tuple)):\n            a.setShortcuts(shortcut)\n        else:\n            a.setShortcut(shortcut)\n    if tip is not None:\n        a.setToolTip(tip)\n        a.setStatusTip(tip)\n    if slot is not None:\n        a.triggered.connect(slot)\n    if checkable:\n        a.setCheckable(True)\n    a.setEnabled(enabled)\n    a.setChecked(checked)\n    return a\n\n\ndef addActions(widget, actions):\n    for action in actions:\n        if action is None:\n            widget.addSeparator()\n        elif isinstance(action, QtWidgets.QMenu):\n            widget.addMenu(action)\n        else:\n            widget.addAction(action)\n\n\ndef labelValidator():\n    return QtGui.QRegularExpressionValidator(QtCore.QRegularExpression(r\"^[^ \\t].+\"))\n\n\nclass struct(object):\n    def __init__(self, **kwargs):\n        self.__dict__.update(kwargs)\n\n\ndef distance(p):\n    return sqrt(p.x() * p.x() + p.y() * p.y())\n\n\ndef distancetoline(point, line):\n    p1, p2 = line\n    p1 = np.array([p1.x(), p1.y()])\n    p2 = np.array([p2.x(), p2.y()])\n    p3 = np.array([point.x(), point.y()])\n    if np.dot((p3 - p1), (p2 - p1)) < 0:\n        return np.linalg.norm(p3 - p1)\n    if np.dot((p3 - p2), (p1 - p2)) < 0:\n        return np.linalg.norm(p3 - p2)\n    if np.linalg.norm(p2 - p1) == 0:\n        return 0\n    return np.linalg.norm(np.cross(p2 - p1, p1 - p3)) / np.linalg.norm(p2 - p1)\n\n\ndef fmtShortcut(text):\n    mod, key = text.split(\"+\", 1)\n    return \"<b>%s</b>+<b>%s</b>\" % (mod, key)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/sam.py",
    "content": "from segment_anything import sam_model_registry, SamPredictor, SamAutomaticMaskGenerator\nimport numpy as np\nimport torch\nfrom .helpers import mathOps\n\n\n# create a sam predictor class with funcions to predict and visualize and results\n\n\nclass Sam_Predictor():\n    def __init__(self, model_type, checkpoint_path, device):\n        self.model_type = model_type\n        self.checkpoint_path = checkpoint_path\n        self.device = device\n        self.model = sam_model_registry[model_type](checkpoint=checkpoint_path)\n        self.model.to(device = self.device)\n        self.predictor = SamPredictor(self.model)\n        self.image = None\n        self.mask_logit = None\n        \n\n    def set_new_image(self, image):\n        self.image = image\n        self.predictor.set_image(image)\n    \n    def clear_logit(self):\n        self.mask_logit = None\n\n\n    def predict(self, point_coords=None, point_labels=None, box=None, multimask_output=True, image=None):\n        # print(point_coords , point_labels)\n        # print(f'----------------------- into SAM predict')\n        # print(f'point_coords: {point_coords}, point_labels: {point_labels}, box: {box}')\n        if box is None:\n            # print(f'----------------------- no boxes')\n            if self.mask_logit is None:\n                masks, scores, logits = self.predictor.predict(point_coords=point_coords, \n                                                               point_labels=point_labels, \n                                                               multimask_output=multimask_output)\n            else:\n                masks, scores, logits = self.predictor.predict(point_coords=point_coords, \n                                                               point_labels=point_labels,\n                                                               mask_input=self.mask_logit[None, :, :],\n                                                               multimask_output=multimask_output)\n        else:\n            # print(f'----------------------- boxes')\n            if len(box) == 1:\n                # print(f'----------------------- only one box')\n                input_box = np.array(box[0])\n                masks, scores, logits = self.predictor.predict(point_coords=point_coords, \n                                                            point_labels=point_labels,\n                                                            box=input_box[None, :],\n                                                            multimask_output=multimask_output)\n                \n            else:\n                # print(f'----------------------- multiple boxes')\n                input_box = np.array(box[0])\n                box_tensor = torch.tensor(box, device=self.predictor.device)\n                box_transformed = self.predictor.transform.apply_boxes_torch(box_tensor, image.shape[:2])\n                masks, scores, logits = self.predictor.predict_torch(point_coords=None, \n                                                            point_labels=None,\n                                                            boxes=box_transformed,\n                                                            multimask_output=False)\n        \n        if multimask_output:\n            if box is not None and len(box) != 1:\n                logits = torch.Tensor.cpu(logits).numpy().reshape(-1, logits.shape[-2], logits.shape[-1])\n                masks = torch.Tensor.cpu(masks).numpy().reshape(-1, masks.shape[-2], masks.shape[-1])\n                scores = torch.Tensor.cpu(scores).numpy().reshape(-1)\n            self.mask_logit = logits[np.argmax(scores), :, :]  # Choose the model's best mask logit\n            mask = masks[np.argmax(scores), :, :]  # Choose the model's best mask\n            score = np.max(scores)  # Choose the model's best score\n        \n        return mask, score\n    \n    \n    def predict_batch(self,  boxes=None, image=None):\n        boxes = np.array(boxes)\n        input_boxes = torch.tensor(boxes, device=self.predictor.device)\n        transformed_boxes = self.predictor.transform.apply_boxes_torch(input_boxes, image.shape[:2])\n        masks, scores, logits = self.predictor.predict_torch(\n        point_coords=None,\n        point_labels=None,\n        boxes=transformed_boxes,\n        multimask_output=False,\n    )\n        return masks, scores\n\n        \n    def check_image(self , new_image):\n        if not np.array_equal(self.image, new_image):\n            # print(\"image changed_1\")\n            self.mask_logit = None\n            self.image = new_image\n            self.predictor.set_image(new_image)\n            # print(\"image changed_2\")\n            return False\n        return True\n\n    def get_all_shapes(self, image, iou_threshold):\n        \n        # self.mask_generator = SamAutomaticMaskGenerator(\n        #     model: Sam,\n        #     points_per_side: Optional[int] = 32,\n        #     points_per_batch: int = 64,\n        #     pred_iou_thresh: float = 0.88,\n        #     stability_score_thresh: float = 0.95,\n        #     stability_score_offset: float = 1.0,\n        #     box_nms_thresh: float = 0.7,\n        #     crop_n_layers: int = 0,\n        #     crop_nms_thresh: float = 0.7,\n        #     crop_overlap_ratio: float = 512 / 1500,\n        #     crop_n_points_downscale_factor: int = 1,\n        #     point_grids: Optional[List[np.ndarray]] = None,\n        #     min_mask_region_area: int = 0,\n        #     output_mode: str = \"binary_mask\",\n        # )\n        \n        \n        self.mask_generator = SamAutomaticMaskGenerator(\n            model = self.model,\n            # points_per_side = 32,\n            # points_per_batch = 64,\n            # pred_iou_thresh = 0.88,\n            # stability_score_thresh = 0.95,\n            # stability_score_offset = 1.0,\n            # box_nms_thresh = 0.3,\n            # crop_n_layers = 0,\n            # crop_nms_thresh = 0.7,\n            # crop_overlap_ratio = 512 / 1500,\n            # crop_n_points_downscale_factor = 1,\n            # point_grids = None,\n            # min_mask_region_area = image.shape[0] * image.shape[1] * 0.0005,\n            # output_mode = \"binary_mask\",\n        )\n        \n        # Arguments(\n        #   model (Sam): The SAM model to use for mask prediction.\n        \n        #   points_per_side (int or None): The number of points to be sampled\n        #     along one side of the image. The total number of points is\n        #     points_per_side**2. If None, 'point_grids' must provide explicit\n        #     point sampling.\n        \n        #   points_per_batch (int): Sets the number of points run simultaneously\n        #     by the model. Higher numbers may be faster but use more GPU memory.\n        \n        #   pred_iou_thresh (float): A filtering threshold in [0,1], using the\n        #     model's predicted mask quality.\n          \n        #   stability_score_thresh (float): A filtering threshold in [0,1], using\n        #     the stability of the mask under changes to the cutoff used to binarize\n        #     the model's mask predictions.\n          \n        #   stability_score_offset (float): The amount to shift the cutoff when\n        #     calculated the stability score.\n          \n        #   box_nms_thresh (float): The box IoU cutoff used by non-maximal\n        #     suppression to filter duplicate masks.\n          \n        #   crop_n_layers (int): If >0, mask prediction will be run again on\n        #     crops of the image. Sets the number of layers to run, where each\n        #     layer has 2**i_layer number of image crops.\n          \n        #   crop_nms_thresh (float): The box IoU cutoff used by non-maximal\n        #     suppression to filter duplicate masks between different crops.\n          \n        #   crop_overlap_ratio (float): Sets the degree to which crops overlap.\n        #     In the first crop layer, crops will overlap by this fraction of\n        #     the image length. Later layers with more crops scale down this overlap.\n          \n        #   crop_n_points_downscale_factor (int): The number of points-per-side\n        #     sampled in layer n is scaled down by crop_n_points_downscale_factor**n.\n          \n        #   point_grids (list(np.ndarray) or None): A list over explicit grids\n        #     of points used for sampling, normalized to [0,1]. The nth grid in the\n        #     list is used in the nth crop layer. Exclusive with points_per_side.\n          \n        #   min_mask_region_area (int): If >0, postprocessing will be applied\n        #     to remove disconnected regions and holes in masks with area smaller\n        #     than min_mask_region_area. Requires opencv.\n          \n        #   output_mode (str): The form masks are returned in. Can be 'binary_mask',\n        #     'uncompressed_rle', or 'coco_rle'. 'coco_rle' requires pycocotools.\n        #     For large resolutions, 'binary_mask' may consume large amounts of\n        #     memory.\n        # )\n        \n        \n        \n        # sam_result is a list of dictionaries\n        # each dictionary (mask) has the following keys:\n            # segmentation - [np.ndarray] - the mask with (W, H) shape, and bool type\n            # area - [int] - the area of the mask in pixels\n            # bbox - [List[int]] - the boundary box of the mask in xywh format\n            # predicted_iou - [float] - the model's own prediction for the quality of the mask\n            # point_coords - [List[List[float]]] - the sampled input point that generated this mask\n            # stability_score - [float] - an additional measure of mask quality\n            # crop_box - List[int] - the crop of the image used to generate this mask in xywh format\n            \n        sam_result = self.mask_generator.generate(image)\n        shapes = mathOps.OURnms_areaBased_fromSAM(sam_result, iou_threshold=iou_threshold) # with AREA not score\n        \n        return shapes\n    "
  },
  {
    "path": "DLTA_AI_app/labelme/utils/shape.py",
    "content": "import math\nimport uuid\n\nimport numpy as np\nimport PIL.Image\nimport PIL.ImageDraw\n\nfrom labelme.logger import logger\n\n\ndef polygons_to_mask(img_shape, polygons, shape_type=None):\n    logger.warning(\n        \"The 'polygons_to_mask' function is deprecated, \"\n        \"use 'shape_to_mask' instead.\"\n    )\n    return shape_to_mask(img_shape, points=polygons, shape_type=shape_type)\n\n\ndef shape_to_mask(\n    img_shape, points, shape_type=None, line_width=10, point_size=5\n):\n    mask = np.zeros(img_shape[:2], dtype=np.uint8)\n    mask = PIL.Image.fromarray(mask)\n    draw = PIL.ImageDraw.Draw(mask)\n    xy = [tuple(point) for point in points]\n    if shape_type == \"circle\":\n        assert len(xy) == 2, \"Shape of shape_type=circle must have 2 points\"\n        (cx, cy), (px, py) = xy\n        d = math.sqrt((cx - px) ** 2 + (cy - py) ** 2)\n        draw.ellipse([cx - d, cy - d, cx + d, cy + d], outline=1, fill=1)\n    elif shape_type == \"rectangle\":\n        assert len(xy) == 2, \"Shape of shape_type=rectangle must have 2 points\"\n        draw.rectangle(xy, outline=1, fill=1)\n    elif shape_type == \"line\":\n        assert len(xy) == 2, \"Shape of shape_type=line must have 2 points\"\n        draw.line(xy=xy, fill=1, width=line_width)\n    elif shape_type == \"linestrip\":\n        draw.line(xy=xy, fill=1, width=line_width)\n    elif shape_type == \"point\":\n        assert len(xy) == 1, \"Shape of shape_type=point must have 1 points\"\n        cx, cy = xy[0]\n        r = point_size\n        draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=1, fill=1)\n    else:\n        assert len(xy) > 2, \"Polygon must have points more than 2\"\n        draw.polygon(xy=xy, outline=1, fill=1)\n    mask = np.array(mask, dtype=bool)\n    return mask\n\n\ndef shapes_to_label(img_shape, shapes, label_name_to_value):\n    cls = np.zeros(img_shape[:2], dtype=np.int32)\n    ins = np.zeros_like(cls)\n    instances = []\n    for shape in shapes:\n        points = shape[\"points\"]\n        label = shape[\"label\"]\n        group_id = shape.get(\"group_id\")\n        if group_id is None:\n            group_id = uuid.uuid1()\n        shape_type = shape.get(\"shape_type\", None)\n\n        cls_name = label\n        instance = (cls_name, group_id)\n\n        if instance not in instances:\n            instances.append(instance)\n        ins_id = instances.index(instance) + 1\n        cls_id = label_name_to_value[cls_name]\n\n        mask = shape_to_mask(img_shape[:2], points, shape_type)\n        cls[mask] = cls_id\n        ins[mask] = ins_id\n\n    return cls, ins\n\n\ndef labelme_shapes_to_label(img_shape, shapes):\n    logger.warn(\n        \"labelme_shapes_to_label is deprecated, so please use \"\n        \"shapes_to_label.\"\n    )\n\n    label_name_to_value = {\"_background_\": 0}\n    for shape in shapes:\n        label_name = shape[\"label\"]\n        if label_name in label_name_to_value:\n            label_value = label_name_to_value[label_name]\n        else:\n            label_value = len(label_name_to_value)\n            label_name_to_value[label_name] = label_value\n\n    lbl, _ = shapes_to_label(img_shape, shapes, label_name_to_value)\n    return lbl, label_name_to_value\n\n\ndef masks_to_bboxes(masks):\n    if masks.ndim != 3:\n        raise ValueError(\n            \"masks.ndim must be 3, but it is {}\".format(masks.ndim)\n        )\n    if masks.dtype != bool:\n        raise ValueError(\n            \"masks.dtype must be bool type, but it is {}\".format(masks.dtype)\n        )\n    bboxes = []\n    for mask in masks:\n        where = np.argwhere(mask)\n        (y1, x1), (y2, x2) = where.min(0), where.max(0) + 1\n        bboxes.append((y1, x1, y2, x2))\n    bboxes = np.asarray(bboxes, dtype=np.float32)\n    return bboxes\n"
  },
  {
    "path": "DLTA_AI_app/labelme/utils/vid_to_frames.py",
    "content": "import os\nimport sys\nimport cv2\nfrom PyQt6.QtWidgets import QApplication, QWidget, QLabel, QPushButton, QFileDialog, QSlider, QLineEdit, QVBoxLayout, QHBoxLayout, QDialog, QProgressBar\nfrom PyQt6.QtCore import Qt\nfrom PyQt6.QtGui import QFont\nfrom PyQt6 import QtWidgets\nimport qdarktheme\n\n\nclass VideoFrameExtractor(QDialog):\n    def __init__(self, mute = None, notification = None):\n        super().__init__()\n        self.mute = mute\n        self.notification = notification\n        # set minimum window size\n        self.setMinimumSize(500, 300)\n\n        self.setWindowTitle(\"Open Video as Frames\")\n        self.setWindowFlags(self.windowFlags() & ~Qt.WindowType.WindowContextHelpButtonHint)\n\n\n        self.sampling_max = 100\n        # Initialize variables\n        self.vid_path = None\n        self.sampling_rate = 1\n        self.start_frame = 1\n        self.end_frame = None\n        self.fps = None\n        self.stop = False\n        self.path_name = None\n\n        font = QFont()\n        font.setBold(True)\n\n        # Create widgets\n        self.file_label = QLabel(\"Select a video file:\")\n        self.file_button = QPushButton(\"Open Video\")\n        self.file_button.clicked.connect(self.select_file)\n\n        self.sampling_label = QLabel(\"Sampling rate:\")\n        self.sampling_slider = QSlider()\n        self.sampling_slider.setOrientation(Qt.Orientation.Horizontal)\n        self.sampling_slider.setRange(1, self.sampling_max)\n        self.sampling_slider.setValue(1)\n        self.sampling_slider.setEnabled(False)\n        self.sampling_slider.valueChanged.connect(self.update_sampling_rate)\n        self.sampling_edit = QLineEdit(str(self.sampling_slider.value()))\n        self.sampling_edit.setFont(QFont('Arial', 10))\n        self.sampling_edit.setAlignment(Qt.AlignmentFlag.AlignCenter)\n        self.sampling_edit.setEnabled(False)\n        self.sampling_edit.textChanged.connect(self.update_sampling_slider)\n        self.sampling_time_label = QLabel(\"hh:mm:ss\")\n        self.sampling_time_label.setFont(font)\n        self.sampling_time_label.setAlignment(Qt.AlignmentFlag.AlignRight)\n\n        self.start_label = QLabel(\"Start frame:\")\n        self.start_slider = QSlider()\n        self.start_slider.setOrientation(Qt.Orientation.Horizontal)\n        self.start_slider.setRange(0, 1000)\n        self.start_slider.setValue(0)\n        self.start_slider.setEnabled(False)\n        self.start_slider.valueChanged.connect(self.update_start_frame)\n        self.start_edit = QLineEdit(str(self.start_slider.value()))\n        self.start_edit.setFont(QFont('Arial', 10))\n        self.start_edit.setAlignment(Qt.AlignmentFlag.AlignCenter)\n        self.start_edit.setEnabled(False)\n        self.start_edit.textChanged.connect(self.update_start_slider)\n        self.start_time_label = QLabel(\"hh:mm:ss\")\n        self.start_time_label.setFont(font)\n        self.start_time_label.setAlignment(Qt.AlignmentFlag.AlignRight)\n\n        self.end_label = QLabel(\"End frame:\")\n        self.end_slider = QSlider()\n        self.end_slider.setOrientation(Qt.Orientation.Horizontal)\n        self.end_slider.setRange(0, 1)\n        self.end_slider.setValue(1)\n        self.end_slider.setEnabled(False)\n        self.end_slider.valueChanged.connect(self.update_end_frame)\n        self.end_edit = QLineEdit(str(self.end_slider.value()))\n        self.end_edit.setFont(QFont('Arial', 10))\n        self.end_edit.setAlignment(Qt.AlignmentFlag.AlignCenter)\n        self.end_edit.setEnabled(False)\n        self.end_edit.textChanged.connect(self.update_end_slider)\n        self.end_time_label = QLabel(\"hh:mm:ss\")\n        self.end_time_label.setFont(font)\n        self.end_time_label.setAlignment(Qt.AlignmentFlag.AlignRight)\n\n        self.extract_button = QPushButton(\"Extract Frames\")\n        self.extract_button.clicked.connect(self.extract_frames)\n        self.extract_button.setEnabled(False)\n\n        self.stop_button = QPushButton(\"Stop\")\n        self.stop_button.pressed.connect(self.stop_extraction)\n        self.stop_button.setEnabled(False)\n\n        self.progress_bar = QProgressBar(self)\n        self.progress_bar.setGeometry(50, 150, 300, 20)\n        self.progress_bar.setFormat(\"Waiting for extraction...\")\n        self.progress_bar.setValue(0)\n\n\n        # Create layouts\n        file_layout = QHBoxLayout()\n        file_layout.addWidget(self.file_label)\n        file_layout.addWidget(self.file_button)\n\n        sampling_layout = QHBoxLayout()\n        inner_sampling_layout = QVBoxLayout()\n        inner_sampling_layout.addWidget(self.sampling_label)\n        inner_sampling_layout.addWidget(self.sampling_time_label)\n        sampling_layout.addLayout(inner_sampling_layout)\n        inner_sampling_layout = QVBoxLayout()\n        inner_sampling_layout.addWidget(self.sampling_edit)\n        inner_sampling_layout.addWidget(self.sampling_slider)\n        sampling_layout.addLayout(inner_sampling_layout)\n\n\n        range_layout = QHBoxLayout()\n        \n\n        start_layout = QHBoxLayout()\n        inner_start_layout = QVBoxLayout()\n        inner_start_layout.addWidget(self.start_label, alignment=Qt.AlignmentFlag.AlignLeft)\n        inner_start_layout.addWidget(self.start_time_label, alignment=Qt.AlignmentFlag.AlignLeft)\n        start_layout.addLayout(inner_start_layout)\n        inner_start_layout = QVBoxLayout()\n        inner_start_layout.addWidget(self.start_edit)\n        inner_start_layout.addWidget(self.start_slider)\n        start_layout.addLayout(inner_start_layout)\n\n\n        end_layout = QHBoxLayout()\n        inner_end_layout = QVBoxLayout()\n        inner_end_layout.addWidget(self.end_label)\n        inner_end_layout.addWidget(self.end_time_label)\n        end_layout.addLayout(inner_end_layout)\n        inner_end_layout = QVBoxLayout()\n        inner_end_layout.addWidget(self.end_edit)\n        inner_end_layout.addWidget(self.end_slider)\n        end_layout.addLayout(inner_end_layout)\n\n        range_layout.addLayout(start_layout)\n        end_layout.setContentsMargins(20, 0, 0, 0)\n        range_layout.addLayout(end_layout)\n\n        button_layout = QHBoxLayout()\n        button_layout.addWidget(self.extract_button)\n        button_layout.addWidget(self.stop_button)\n\n        main_layout = QVBoxLayout()\n        main_layout.addLayout(file_layout)\n\n        range_layout.setContentsMargins(0, 20, 0, 0)\n        main_layout.addLayout(range_layout)\n\n        main_layout.addLayout(sampling_layout)\n\n        main_layout.addLayout(button_layout)\n\n        main_layout.addWidget(self.progress_bar)\n\n        # Set the main layout\n        self.setLayout(main_layout)\n\n    def select_file(self):\n        # Open a file dialog to select a video file\n        file_path, _ = QFileDialog.getOpenFileName(self, \"Video to Frames\", \"\", \"Video Files (*.mp4 *.avi *.mov)\")\n        if file_path:\n            self.vid_path = file_path\n            self.file_label.setText(f\"Selected video file: {self.vid_path}\")\n            self.sampling_slider.setEnabled(True)\n            self.sampling_edit.setEnabled(True)\n            self.start_slider.setEnabled(True)\n            self.start_edit.setEnabled(True)\n            self.end_slider.setEnabled(True)\n            self.end_edit.setEnabled(True)\n            self.extract_button.setEnabled(True)\n            self.stop_button.setEnabled(True)\n\n            # Set the stop button to red\n            self.stop_button.setStyleSheet(\"background-color: red; color: white;\")\n            \n            # Open the video file\n            vidcap = cv2.VideoCapture(self.vid_path)\n            self.fps = vidcap.get(cv2.CAP_PROP_FPS)\n\n            # Set the maximum value of the start and end sliders to the total number of frames in the video\n            self.max_frame = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))\n            # Set the start and end sliders to the maximum value\n            self.start_slider.setMaximum(self.max_frame)\n            self.start_time_label.setText(self.get_time_string(0))\n            # update startedit and start time\n            self.end_slider.setMaximum(self.max_frame)\n            self.end_slider.setValue(self.max_frame)\n            # update endedit and end time\n            self.end_edit.setText(str(self.end_slider.value()))\n            self.end_time_label.setText(self.get_time_string(self.max_frame / self.fps))\n            # update sampling \n            self.sampling_time_label.setText(self.get_time_string(1 / self.fps))\n            self.sampling_slider.setMaximum(self.max_frame // 10)\n            self.sampling_slider.setValue(self.max_frame // 100)\n            self.sampling_max = self.max_frame // 10\n            \n        else:\n            self.file_label.setText(\"No video is selected\")\n            self.sampling_slider.setEnabled(False)\n            self.sampling_edit.setEnabled(False)\n            self.start_slider.setEnabled(False)\n            self.start_edit.setEnabled(False)\n            self.end_slider.setEnabled(False)\n            self.end_edit.setEnabled(False)\n\n    def update_sampling_rate(self, value):\n        # Update the sampling rate when the slider is moved\n        self.sampling_rate = value\n        self.sampling_edit.setText(str(value))\n\n    def update_sampling_slider(self, text):\n        # Update the sampling rate when the edit box is changed\n        try:\n            value = int(text)\n            if value < 1:\n                value = 1\n            elif value > self.sampling_max:\n                value = self.sampling_max\n            self.sampling_rate = value\n            self.sampling_slider.setValue(value)\n            if self.fps:\n                self.sampling_time_label.setText(self.get_time_string(value / self.fps))\n                if self.end_frame is not None:\n                    self.progress_bar.setFormat(f\"Will Extract {(self.end_frame - self.start_frame) // self.sampling_rate} Frames\")\n        except ValueError:\n            pass\n\n    def update_start_frame(self, value):\n        # Update the start frame when the slider is moved\n        self.start_frame = value\n        self.start_edit.setText(str(value))\n\n    def update_start_slider(self, text):\n        # Update the start frame when the edit box is changed\n        try:\n            value = int(text)\n            if value < 0:\n                value = 0\n            elif self.end_frame is not None and value > self.end_frame:\n                self.start_slider.setValue(self.end_frame)\n                value = self.end_frame\n            self.start_frame = value\n            self.start_slider.setValue(value)\n            if self.fps:\n                self.start_time_label.setText(self.get_time_string(value / self.fps))\n                if self.end_frame is not None:\n                    self.progress_bar.setFormat(f\"Will Extract {(self.end_frame - self.start_frame) // self.sampling_rate} Frames\")\n        except ValueError:\n            pass\n\n    def update_end_frame(self, value):\n        # Update the end frame when the slider is moved\n        self.end_frame = value\n        self.end_edit.setText(str(value))\n\n    def update_end_slider(self, text):\n        # Update the end frame when the edit box is changed\n        try:\n            value = int(text)\n            if self.start_frame is not None and value < self.start_frame:\n                value = self.start_frame\n            self.end_frame = value\n            self.end_slider.setValue(value)\n            if self.fps:\n                self.end_time_label.setText(self.get_time_string(value / self.fps))\n                if self.end_frame is not None:\n                    self.progress_bar.setFormat(f\"Will Extract {(self.end_frame - self.start_frame) // self.sampling_rate} Frames\")\n        except ValueError:\n            pass\n\n    def extract_frames(self):\n        # Call the vid_to_frames function with the selected parameters\n        try:\n            self.path_name = self.vid_to_frames(self.vid_path, self.sampling_rate, self.start_frame, self.end_frame)\n        except ValueError as e:\n            self.progress_bar.setFormat(str(e))\n            return\n        self.close()\n        return self.path_name\n        \n    \n    def stop_extraction(self):\n        # stop the extraction process\n        self.stop = True\n\n    def get_time_string(self, seconds, separator=\":\"):\n        # Convert seconds to hh:mm:ss format\n        m, s = divmod(seconds, 60)\n        h, m = divmod(m, 60)\n        return f\"{int(h):02d}{separator}{int(m):02d}{separator}{int(s):02d}\"\n\n\n    def vid_to_frames(self, vid_path, sampling_rate, start_frame, end_frame):\n        \"\"\"\n        Extracts frames from a video file and saves them as JPEG images.\n\n        Args:\n            vid_path (str): Path to the video file.\n            sampling_rate (int): How often to save a frame. For example, if sampling_rate = 2, every other frame will be saved.\n            start_frame (int): Starting frame number.\n            end_frame (int): Ending frame number.\n        \"\"\"\n        # Check if the path exists\n        if not os.path.exists(vid_path):\n            raise ValueError(\"Video path does not exist\")\n\n        # Create a directory to store the frames\n        frames_path = \"\".join([vid_path.split(\".\")[0], \"_frames\"])\n\n        # if the directory does not exist, create it\n        if not os.path.exists(frames_path):\n            os.mkdir(frames_path)\n        # if the directory exists, delete all the files it contains\n        else:\n            for file in os.listdir(frames_path):\n                os.remove(os.path.join(frames_path, file))\n\n        # Open the video file\n        vidcap = cv2.VideoCapture(vid_path)\n        # if the video file does not exist, raise an error\n\n        # Set the starting frame\n        vidcap.set(cv2.CAP_PROP_POS_FRAMES, start_frame)\n\n        # Get the total number of frames in the video\n        n_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))\n        print(f\"Total number of frames: {n_frames}\")\n\n        # Initialize counters\n        count = start_frame\n        success = True\n\n        # set progress bar Format\n        while success:\n            success, image = vidcap.read()\n            if count % sampling_rate == 0:\n                # Get the time in the video corresponding to the current frame\n                time_in_sec = count / self.fps\n                time_str = self.get_time_string(time_in_sec, separator=\"_\")\n\n                # Save the image with the time in the file name\n                indented_count = str(count).zfill(len(str(n_frames)))\n                cv2.imwrite(f\"{frames_path}/frame_{indented_count}_time_{time_str}.jpg\", image)\n            \n            self.progress_bar.setValue(int(((count - start_frame) / (end_frame - start_frame)) * 100))\n            self.progress_bar.setFormat(f\"{int(((count - start_frame) / (end_frame - start_frame)) * 100)}%\")\n\n            count += 1\n            if count >= end_frame:\n                self.progress_bar.setValue(100)\n                break\n\n            QtWidgets.QApplication.processEvents()\n            if self.stop:\n                self.stop = False\n                self.progress_bar.setFormat(\"Extraction stopped\")\n                self.progress_bar.setValue(0)\n                break\n            \n                    # Show a notification if the model explorer is not the active window\n        try:\n            if not self.mute:\n                if not self.isActiveWindow():\n                    self.notification(f\"Video Extraction Completed\")\n        except:\n            pass\n        return frames_path\n\n\n# if __name__ == \"__main__\":\n#     app = QApplication(sys.argv)\n#     qdarktheme.setup_theme()\n#     window = VideoFrameExtractor()\n#     window.show()\n#     sys.exit(app.exec())"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/ClassesWidget.py",
    "content": "\nfrom PyQt6 import QtCore\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\n# add ClassWidget and allow the user to select among coco classes using a combobox\nclass Classeswidget(QtWidgets.QDialog):\n    def __init__(self):\n        super(Classeswidget, self).__init__()\n        self.setModal(True)\n        self.setWindowTitle(\"Select Class\")\n        self.class_name = \"person\"\n        self.class_name = self._createQComboBox()\n    \n    def _createQComboBox(self):\n        class_name = QtWidgets.QComboBox()\n        class_name.addItems([\"person\", \"bicycle\", \"car\", \"motorcycle\", \"airplane\", \"bus\", \"train\", \"truck\", \"boat\", \"traffic light\", \"fire hydrant\", \"stop sign\", \"parking meter\", \"bench\", \"bird\", \"cat\", \"dog\", \"horse\", \"sheep\", \"cow\", \"elephant\", \"bear\", \"zebra\", \"giraffe\", \"backpack\", \"umbrella\", \"handbag\", \"tie\", \"suitcase\", \"frisbee\", \"skis\", \"snowboard\", \"sports ball\", \"kite\", \"baseball bat\", \"baseball glove\", \"skateboard\", \"surfboard\", \"tennis racket\", \"bottle\", \"wine glass\", \"cup\", \"fork\", \"knife\", \"spoon\", \"bowl\", \"banana\", \"apple\", \"sandwich\", \"orange\", \"broccoli\", \"carrot\", \"hot dog\", \"pizza\", \"donut\", \"cake\", \"chair\", \"couch\", \"potted plant\", \"bed\", \"dining table\", \"toilet\", \"tv\", \"laptop\", \"mouse\", \"remote\", \"keyboard\", \"cell phone\", \"microwave\", \"oven\", \"toaster\", \"sink\", \"refrigerator\", \"book\", \"clock\", \"vase\", \"scissors\", \"teddy bear\", \"hair drier\", \"toothbrush\"])\n        class_name.currentIndexChanged.connect(self.onNewValue)\n        return class_name\n\n    def onNewValue(self, value):\n        self.class_name = value\n\n    def getValue(self):\n        return self.class_name\n\n    def setValue(self, value):\n        self.class_name = value\n\n    def exec(self):\n        super(Classeswidget, self).exec()\n        return self.class_name\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/MsgBox.py",
    "content": "from PyQt6 import QtWidgets\n\n\ndef OKmsgBox(title, text, type = \"info\", turnResult = False):\n    \n    \"\"\"\n    Show a message box.\n\n    Args:\n        title (str): The title of the message box.\n        text (str): The text of the message box.\n        type (str, optional): The type of the message box. Can be \"info\", \"warning\", or \"critical\". Defaults to \"info\".\n\n    Returns:\n        int: The result of the message box. This will be the value of the button clicked by the user.\n    \"\"\"\n    \n    msgBox = QtWidgets.QMessageBox()\n    if type == \"info\":\n        msgBox.setIcon(QtWidgets.QMessageBox.Icon.Information)\n    elif type == \"warning\":\n        msgBox.setIcon(QtWidgets.QMessageBox.Warning)\n    elif type == \"critical\":\n        msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)\n    msgBox.setText(text)\n    msgBox.setWindowTitle(title)\n    if turnResult:\n        msgBox.setStandardButtons(QtWidgets.QMessageBox.StandardButton.Ok | QtWidgets.QMessageBox.Cancel)\n        msgBox.setDefaultButton(QtWidgets.QMessageBox.StandardButton.Ok)\n    else:\n        msgBox.setStandardButtons(QtWidgets.QMessageBox.StandardButton.Ok)\n    msgBox.exec()\n    return msgBox.result()\n\n\n\n\n\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/ThresholdWidget.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\n\nclass ThresholdWidget(QtWidgets.QDialog):\n    def __init__(self):\n        super(ThresholdWidget, self).__init__()\n        self.setModal(True)\n        self.setWindowTitle(\"Enter Threshold\")\n        self.threshold = 0.5\n        self.threshold = self._createQLineEdit()\n    \n    def _createQLineEdit(self):\n        threshold = QtWidgets.QLineEdit()\n        threshold.setRange(0, 1)\n        threshold.setValue(0.5)\n        threshold.valueChanged.connect(self.onNewValue)\n        return threshold\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/__init__.py",
    "content": "# flake8: noqa\n\nfrom .brightness_contrast_dialog import BrightnessContrastDialog\n\nfrom .canvas import Canvas\n\nfrom .color_dialog import ColorDialog\n\nfrom .label_dialog import LabelDialog\nfrom .label_dialog import LabelQLineEdit\n\nfrom .label_list_widget import LabelListWidget\nfrom .label_list_widget import LabelListWidgetItem\n\nfrom .tool_bar import ToolBar\n\nfrom .unique_label_qlist_widget import UniqueLabelQListWidget\n\nfrom .zoom_widget import ZoomWidget\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/brightness_contrast_dialog.py",
    "content": "import PIL.Image\nimport PIL.ImageEnhance\nfrom PyQt6.QtCore import Qt\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\nfrom .. import utils\n\n\nclass BrightnessContrastDialog(QtWidgets.QDialog):\n    def __init__(self, img, callback, parent=None):\n        super(BrightnessContrastDialog, self).__init__(parent)\n        self.setModal(True)\n        self.setWindowTitle(\"Brightness/Contrast\")\n\n        self.slider_brightness = self._create_slider()\n        self.slider_contrast = self._create_slider()\n\n        formLayout = QtWidgets.QFormLayout()\n        formLayout.addRow(self.tr(\"Brightness\"), self.slider_brightness)\n        formLayout.addRow(self.tr(\"Contrast\"), self.slider_contrast)\n        self.setLayout(formLayout)\n\n        assert isinstance(img, PIL.Image.Image)\n        self.img = img\n        self.callback = callback\n\n    def onNewValue(self, value):\n        brightness = self.slider_brightness.value() / 50.0\n        contrast = self.slider_contrast.value() / 50.0\n\n        img = self.img\n        img = PIL.ImageEnhance.Brightness(img).enhance(brightness)\n        img = PIL.ImageEnhance.Contrast(img).enhance(contrast)\n\n        img_data = utils.img_pil_to_data(img)\n        qimage = QtGui.QImage.fromData(img_data)\n        self.callback(qimage)\n\n    def _create_slider(self):\n        slider = QtWidgets.QSlider(Qt.Orientation.Horizontal)\n        slider.setRange(0, 150)\n        slider.setValue(50)\n        slider.valueChanged.connect(self.onNewValue)\n        return slider\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/canvas.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\nfrom labelme import QT5\nfrom labelme.shape import Shape\nimport labelme.utils\nimport copy\n\n\n# TODO(unknown):\n# - [maybe] Find optimal epsilon value.\n\n\nCURSOR_DEFAULT = QtCore.Qt.CursorShape.ArrowCursor\nCURSOR_POINT = QtCore.Qt.CursorShape.PointingHandCursor\nCURSOR_DRAW = QtCore.Qt.CursorShape.CrossCursor\nCURSOR_MOVE = QtCore.Qt.CursorShape.ClosedHandCursor\nCURSOR_GRAB = QtCore.Qt.CursorShape.OpenHandCursor\n\n\nclass Canvas(QtWidgets.QWidget):\n\n    zoomRequest = QtCore.pyqtSignal(int, QtCore.QPoint)\n    scrollRequest = QtCore.pyqtSignal(int, int)\n    newShape = QtCore.pyqtSignal()\n    selectionChanged = QtCore.pyqtSignal(list)\n    shapeMoved = QtCore.pyqtSignal()\n    drawingPolygon = QtCore.pyqtSignal(bool)\n    edgeSelected = QtCore.pyqtSignal(bool, object)\n    vertexSelected = QtCore.pyqtSignal(bool)\n    \n    # SAM signals\n    pointAdded = QtCore.pyqtSignal()\n    samFinish = QtCore.pyqtSignal()\n    \n    # refresh visualization\n    APPrefresh = QtCore.pyqtSignal(bool)\n\n    CREATE, EDIT = 0, 1\n    CREATE, EDIT = 0, 1\n\n    # polygon only\n    _createMode = \"polygon\"\n\n    _fill_drawing = False\n\n    def __init__(self, *args, **kwargs):\n        self.epsilon = kwargs.pop(\"epsilon\", 10.0)\n        self.double_click = kwargs.pop(\"double_click\", \"close\")\n        if self.double_click not in [None, \"close\"]:\n            raise ValueError(\n                \"Unexpected value for double_click event: {}\".format(\n                    self.double_click\n                )\n            )\n        self.num_backups = kwargs.pop(\"num_backups\", 10)\n        super(Canvas, self).__init__(*args, **kwargs)\n        \n        # Initialise local state.\n        self.mode = self.EDIT\n        self.shapes = []\n        \n        # Segment anything (SAM) attributes\n        self.SAM_mode = \"\"\n        self.SAM_coordinates = []\n        self.SAM_rect = []\n        self.SAM_rects = []\n        self.SAM_painter = QtGui.QPainter()\n        self.SAM_current = None\n        \n        # mouse tracking\n        self.show_cross_line = True\n        \n        # Waiting window\n        self.is_loading = False\n        self.loading_angle = 0\n        self.loading_text = \"Loading...\"\n        \n        # tracking area\n        self.tracking_area = \"\"\n        self.tracking_area_polygon = []\n        \n        self.current_annotation_mode = \"\"\n\n        self.shapesBackups = []\n        self.current = None\n        self.selectedShapes = []  # save the selected shapes here\n        self.selectedShapesCopy = []\n        # self.line represents:\n        #   - createMode == 'polygon': edge from last point to current\n        self.line = Shape()\n        self.prevPoint = QtCore.QPoint()\n        self.prevMovePoint = QtCore.QPoint()\n        self.offsets = QtCore.QPoint(), QtCore.QPoint()\n        self.scale = 1.0\n        self.pixmap = QtGui.QPixmap()\n        self.visible = {}\n        self._hideBackround = False\n        self.hideBackround = False\n        self.hShape = None\n        self.prevhShape = None\n        self.hVertex = None\n        self.prevhVertex = None\n        self.hEdge = None\n        self.prevhEdge = None\n        self.movingShape = False\n        self._painter = QtGui.QPainter()\n        self._cursor = CURSOR_DEFAULT\n\n        # Menus:\n        # 0: right-click without selection and dragging of shapes\n        # 1: right-click with selection and dragging of shapes\n        self.menus = (QtWidgets.QMenu(), QtWidgets.QMenu())\n        \n        # Set widget options.\n        self.setMouseTracking(True)\n        self.setFocusPolicy(QtCore.Qt.FocusPolicy.WheelFocus)\n\n    def fillDrawing(self):\n        return self._fill_drawing\n\n    def setFillDrawing(self, value):\n        self._fill_drawing = value\n\n    @property\n    def createMode(self):\n        return self._createMode\n\n    @createMode.setter\n    def createMode(self, value):\n        if value not in [\n            \"polygon\",\n        ]:\n            raise ValueError(\"Unsupported createMode: %s\" % value)\n        self._createMode = value\n\n    def storeShapes(self):\n        shapesBackup = []\n        for shape in self.shapes:\n            shapesBackup.append(shape.copy())\n        if len(self.shapesBackups) > self.num_backups:\n            self.shapesBackups = self.shapesBackups[-self.num_backups - 1:]\n        self.shapesBackups.append(shapesBackup)\n\n    @property\n    def isShapeRestorable(self):\n        # We save the state AFTER each edit (not before) so for an\n        # edit to be undoable, we expect the CURRENT and the PREVIOUS state\n        # to be in the undo stack.\n        if len(self.shapesBackups) < 2:\n            return False\n        return True\n\n    def restoreShape(self):\n        # This does _part_ of the job of restoring shapes.\n        # The complete process is also done in app.py::undoShapeEdit\n        # and app.py::loadShapes and our own Canvas::loadShapes function.\n        if not self.isShapeRestorable:\n            return\n        self.shapesBackups.pop()  # latest\n\n        # The application will eventually call Canvas.loadShapes which will\n        # push this right back onto the stack.\n        shapesBackup = self.shapesBackups.pop()\n        self.shapes = shapesBackup\n        self.selectedShapes = []\n        for shape in self.shapes:\n            shape.selected = False\n        self.update()\n\n    def enterEvent(self, ev):\n        self.overrideCursor(self._cursor)\n\n    def leaveEvent(self, ev):\n        self.unHighlight()\n        self.restoreCursor()\n\n    def focusOutEvent(self, ev):\n        self.restoreCursor()\n\n    def isVisible(self, shape):\n        return self.visible.get(shape, True)\n\n    def drawing(self):\n        return self.mode == self.CREATE\n\n    def editing(self):\n        return self.mode == self.EDIT\n\n    def setEditing(self, value=True):\n        self.mode = self.EDIT if value else self.CREATE\n        if not value:  # Create\n            self.unHighlight()\n            self.deSelectShape()\n\n    def unHighlight(self):\n        if self.hShape:\n            self.hShape.highlightClear()\n            self.update()\n        self.prevhShape = self.hShape\n        self.prevhVertex = self.hVertex\n        self.prevhEdge = self.hEdge\n        self.hShape = self.hVertex = self.hEdge = None\n\n    def selectedVertex(self):\n        return self.hVertex is not None\n\n    def set_show_cross_line(self, enabled):\n        \"\"\"Set cross line visibility\"\"\"\n        self.show_cross_line = enabled\n        self.update()\n\n    def mouseMoveEvent(self, ev):\n        \"\"\"Update line with last point and current coordinates.\"\"\"\n        try:\n            pos = self.transformPos(ev.position())\n        except AttributeError:\n            return\n\n        self.prevMovePoint = pos\n        self.repaint()\n        self.restoreCursor()\n\n        # Polygon drawing.\n        if self.drawing():\n            self.line.shape_type = self.createMode\n\n            self.overrideCursor(CURSOR_DRAW)\n            if not self.current:\n                return\n\n            if self.outOfPixmap(pos):\n                # Don't allow the user to draw outside the pixmap.\n                # Project the point to the pixmap's edges.\n                pos = self.intersectionPoint(self.current[-1], pos)\n            elif (\n                len(self.current) > 1\n                and self.createMode == \"polygon\"\n                and self.closeEnough(pos, self.current[0])\n            ):\n                # Attract line to starting point and\n                # colorise to alert the user.\n                pos = self.current[0]\n                self.overrideCursor(CURSOR_POINT)\n                self.current.highlightVertex(0, Shape.NEAR_VERTEX)\n            if self.createMode in [\"polygon\"]:\n                self.line[0] = self.current[-1]\n                self.line[1] = pos\n            self.repaint()\n            self.current.highlightClear()\n            return\n\n        # Polygon copy moving.\n        if QtCore.Qt.MouseButton.RightButton & ev.buttons():\n            if self.selectedShapesCopy and self.prevPoint:\n                self.overrideCursor(CURSOR_MOVE)\n                self.boundedMoveShapes(self.selectedShapesCopy, pos)\n                self.repaint()\n            elif self.selectedShapes:\n                self.selectedShapesCopy = [\n                    s.copy() for s in self.selectedShapes\n                ]\n                self.repaint()\n            return\n\n        # Polygon/Vertex moving.\n        if QtCore.Qt.MouseButton.LeftButton & ev.buttons():\n            if self.selectedVertex():\n                self.boundedMoveVertex(pos)\n                self.repaint()\n                self.movingShape = True\n            elif self.selectedShapes and self.prevPoint:\n                self.overrideCursor(CURSOR_MOVE)\n                self.boundedMoveShapes(self.selectedShapes, pos)\n                self.repaint()\n                self.movingShape = True\n            return\n\n        # Just hovering over the canvas, 2 possibilities:\n        # - Highlight shapes\n        # - Highlight vertex\n        # Update shape/vertex fill and tooltip value accordingly.\n        self.setToolTip(self.tr(\"Image\"))\n        for shape in reversed([s for s in self.shapes if self.isVisible(s)]):\n            # Look for a nearby vertex to highlight. If that fails,\n            # check if we happen to be inside a shape.\n            index = shape.nearestVertex(pos, self.epsilon / self.scale)\n            index_edge = shape.nearestEdge(pos, self.epsilon / self.scale)\n            if index is not None:\n                if self.selectedVertex():\n                    self.hShape.highlightClear()\n                self.prevhVertex = self.hVertex = index\n                self.prevhShape = self.hShape = shape\n                self.prevhEdge = self.hEdge = index_edge\n                shape.highlightVertex(index, shape.MOVE_VERTEX)\n                self.overrideCursor(CURSOR_POINT)\n                self.setToolTip(self.tr(\"Click & drag to move point\"))\n                self.setStatusTip(self.toolTip())\n                self.update()\n                break\n            elif shape.containsPoint(pos):\n                if self.selectedVertex():\n                    self.hShape.highlightClear()\n                self.prevhVertex = self.hVertex\n                self.hVertex = None\n                self.prevhShape = self.hShape = shape\n                self.prevhEdge = self.hEdge = index_edge\n                self.setToolTip(\n                    self.tr(\"Click & drag to move shape '%s'\") % shape.label\n                )\n                # conf = shape.content (to two decimal places)\n\n                if shape.group_id != None and self.current_annotation_mode == 'video':\n                    self.setToolTip(\n                        self.tr(f'ID {str(shape.group_id)} {shape.label} {shape.content}'))\n                else:\n                    self.setToolTip(self.tr(f'{shape.label} {shape.content}'))\n\n                self.setStatusTip(self.toolTip())\n                self.overrideCursor(CURSOR_GRAB)\n                self.update()\n                break\n        else:  # Nothing found, clear highlights, reset state.\n            self.unHighlight()\n        self.edgeSelected.emit(self.hEdge is not None, self.hShape)\n        self.vertexSelected.emit(self.hVertex is not None)\n\n    def addPointToEdge(self):\n        shape = self.prevhShape\n        index = self.prevhEdge\n        point = self.prevMovePoint\n        if shape is None or index is None or point is None:\n            return\n        shape.insertPoint(index, point)\n        shape.highlightVertex(index, shape.MOVE_VERTEX)\n        self.hShape = shape\n        self.hVertex = index\n        self.hEdge = None\n        self.movingShape = True\n\n    def removeSelectedPoint(self):\n        shape = self.prevhShape\n        point = self.prevMovePoint\n        if shape is None or point is None:\n            return\n        index = shape.nearestVertex(point, self.epsilon)\n        shape.removePoint(index)\n        self.hShape = shape\n        self.hVertex = None\n        self.hEdge = None\n        self.movingShape = True  # Save changes\n\n    def corrected_pos_into_pixmap(self, pos):\n        x = pos.x()\n        y = pos.y()\n        x = min(self.pixmap.width() , max(0, x))\n        y = min(self.pixmap.height(), max(0, y))\n        res = QtCore.QPointF(x, y)\n        return res\n\n    def mousePressEvent(self, ev):\n        \n        pos = self.transformPos(ev.position())\n\n        \n        if ev.button() == QtCore.Qt.MouseButton.LeftButton:\n            if self.drawing() and self.SAM_mode == \"\":\n                if self.current:\n                    # Add point to existing shape.\n                    if self.createMode == \"polygon\":\n                        self.current.addPoint(self.line[1])\n                        self.line[0] = self.current[-1]\n                        if self.current.isClosed():\n                            self.finalise()\n                elif not self.outOfPixmap(pos):\n                    # Create new shape.\n                    self.current = Shape(shape_type=self.createMode)\n                    self.current.addPoint(pos)\n                    self.line.points = [pos, pos]\n                    self.setHiding()\n                    self.drawingPolygon.emit(True)\n                    self.update()\n            elif self.SAM_mode == \"add point\":\n                if not self.outOfPixmap(pos):\n                    # add the coordinates and the label (1 forground 0 background)\n                    self.SAM_coordinates.append([pos.x(), pos.y(), 1])\n                    self.pointAdded.emit()\n\n            elif self.SAM_mode == 'remove point':\n                if not self.outOfPixmap(pos):\n                    # add the coordinates and the label (1 forground 0 background)\n                    self.SAM_coordinates.append([pos.x(), pos.y(), 0])\n                    self.pointAdded.emit()\n            elif self.SAM_mode == 'select rect':\n                self.SAM_rect.append(self.corrected_pos_into_pixmap(pos))\n                if len(self.SAM_rect) == 2:\n                    self.SAM_rects = [self.SAM_rect]\n                    self.pointAdded.emit()\n                    self.SAM_rect = []\n            \n            elif self.tracking_area == \"drawing\":\n                corrected_pos = self.corrected_pos_into_pixmap(pos)\n                self.tracking_area_polygon.append([corrected_pos.x(), corrected_pos.y()])\n            \n            # the other is editing mode\n            else:\n                group_mode = ev.modifiers() == QtCore.Qt.KeyboardModifier.ControlModifier\n                self.selectShapePoint(pos, multiple_selection_mode=group_mode)\n                self.prevPoint = pos\n                self.repaint()\n        elif ev.button() == QtCore.Qt.MouseButton.RightButton and self.editing():\n            group_mode = ev.modifiers() == QtCore.Qt.KeyboardModifier.ControlModifier\n            self.selectShapePoint(pos, multiple_selection_mode=group_mode)\n            self.prevPoint = pos\n            self.repaint()\n\n    def handle_right_click(self, menu):\n        try:\n            setEnabledd = menu.actions()[7].text() == \"Edit &Label\" and menu.actions()[7].isEnabled()\n            if menu.actions()[10].text() == \"&Mark as key\":\n                menu.actions()[10].setEnabled(setEnabledd)\n            if menu.actions()[11].text() == \"&Scale\":\n                menu.actions()[11].setEnabled(setEnabledd)\n        except:\n            pass\n        return menu\n\n    def mouseReleaseEvent(self, ev):\n        \n        pos = self.transformPos(ev.position())\n        \n        if ev.button() == QtCore.Qt.MouseButton.RightButton:\n            menu = self.menus[len(self.selectedShapesCopy) > 0]\n            menu = self.handle_right_click(menu)\n            self.restoreCursor()\n            if (\n                not menu.exec(self.mapToGlobal(ev.pos()))\n                and self.selectedShapesCopy\n            ):\n                # Cancel the move by deleting the shadow copy.\n                self.selectedShapesCopy = []\n                self.repaint()\n        elif ev.button() == QtCore.Qt.MouseButton.LeftButton and self.selectedShapes:\n            self.overrideCursor(CURSOR_GRAB)\n            if (\n                self.editing()\n                and ev.modifiers() == QtCore.Qt.KeyboardModifier.ShiftModifier\n            ):\n                # Add point to line if: left-click + SHIFT on a line segment\n                self.addPointToEdge()\n        elif ev.button() == QtCore.Qt.MouseButton.LeftButton and self.selectedVertex():\n            if (\n                self.editing()\n                and ev.modifiers() == QtCore.Qt.KeyboardModifier.ShiftModifier\n            ):\n                # Delete point if: left-click + SHIFT on a point\n                self.removeSelectedPoint()\n        elif ev.button() == QtCore.Qt.MouseButton.LeftButton and len(self.SAM_rect) == 1:\n            if abs(pos.x() - self.SAM_rect[0].x()) + abs(pos.y() - self.SAM_rect[0].y()) > 50:\n                self.SAM_rect.append(self.corrected_pos_into_pixmap(pos))\n                self.SAM_rects = [self.SAM_rect]\n                self.pointAdded.emit()\n                self.SAM_rect = []\n\n        if self.movingShape and self.hShape:\n            index = self.shapes.index(self.hShape)\n            if (\n                self.shapesBackups[-1][index].points\n                != self.shapes[index].points\n            ):\n                self.storeShapes()\n                self.shapeMoved.emit()\n\n            self.movingShape = False\n            self.APPrefresh.emit(True)\n\n    def endMove(self, copy):\n        assert self.selectedShapes and self.selectedShapesCopy\n        assert len(self.selectedShapesCopy) == len(self.selectedShapes)\n        if copy:\n            for i, shape in enumerate(self.selectedShapesCopy):\n                self.shapes.append(shape)\n                self.selectedShapes[i].selected = False\n                self.selectedShapes[i] = shape\n        else:\n            for i, shape in enumerate(self.selectedShapesCopy):\n                self.selectedShapes[i].points = shape.points\n        self.selectedShapesCopy = []\n        self.repaint()\n        self.storeShapes()\n        return True\n\n    def hideBackroundShapes(self, value):\n        self.hideBackround = value\n        if self.selectedShapes:\n            # Only hide other shapes if there is a current selection.\n            # Otherwise the user will not be able to select a shape.\n            self.setHiding(True)\n            self.update()\n\n    def setHiding(self, enable=True):\n        self._hideBackround = self.hideBackround if enable else False\n\n    def canCloseShape(self):\n        return self.drawing() and self.current and len(self.current) > 2\n\n    def mouseDoubleClickEvent(self, ev):\n        # We need at least 4 points here, since the mousePress handler\n        # adds an extra one before this handler is called.\n        if (\n            self.double_click == \"close\"\n            and self.canCloseShape()\n            and len(self.current) > 3\n        ):\n            self.current.popPoint()\n            self.finalise()\n            \n        if self.tracking_area == \"drawing\":\n            self.tracking_area = \"drawn\"\n            self.update()\n\n    def selectShapes(self, shapes):\n        self.setHiding()\n        self.selectionChanged.emit(shapes)\n        self.update()\n\n    def selectShapePoint(self, point, multiple_selection_mode):\n        \"\"\"Select the first shape created which contains this point.\"\"\"\n        if self.selectedVertex():  # A vertex is marked for selection.\n            index, shape = self.hVertex, self.hShape\n            shape.highlightVertex(index, shape.MOVE_VERTEX)\n        else:\n            for shape in reversed(self.shapes):\n                if self.isVisible(shape) and shape.containsPoint(point):\n                    self.calculateOffsets(shape, point)\n                    self.setHiding()\n                    if multiple_selection_mode:\n                        if shape not in self.selectedShapes:\n                            self.selectionChanged.emit(\n                                self.selectedShapes + [shape]\n                            )\n                    else:\n                        self.selectionChanged.emit([shape])\n                    return\n        self.deSelectShape()\n\n    def calculateOffsets(self, shape, point):\n        rect = shape.boundingRect()\n        x1 = rect.x() - point.x()\n        y1 = rect.y() - point.y()\n        x2 = (rect.x() + rect.width() - 1) - point.x()\n        y2 = (rect.y() + rect.height() - 1) - point.y()\n        self.offsets = QtCore.QPoint(x1, y1), QtCore.QPoint(x2, y2)\n\n    def boundedMoveVertex(self, pos):\n        index, shape = self.hVertex, self.hShape\n        point = shape[index]\n        if self.outOfPixmap(pos):\n            pos = self.intersectionPoint(point, pos)\n        # convert pos to QPointF\n        pos = QtCore.QPointF(pos)\n        shape.moveVertexBy(index, pos - point)\n\n    def boundedMoveShapes(self, shapes, pos):\n        if self.outOfPixmap(pos):\n            return False  # No need to move\n        o1 = pos + QtCore.QPointF(self.offsets[0])\n        if self.outOfPixmap(o1):\n            pos -= QtCore.QPoint(min(0, o1.x()), min(0, o1.y()))\n        o2 = pos + QtCore.QPointF(self.offsets[1])\n        if self.outOfPixmap(o2):\n            pos += QtCore.QPoint(\n                min(0, self.pixmap.width() - o2.x()),\n                min(0, self.pixmap.height() - o2.y()),\n            )\n        # XXX: The next line tracks the new position of the cursor\n        # relative to the shape, but also results in making it\n        # a bit \"shaky\" when nearing the border and allows it to\n        # go outside of the shape's area for some reason.\n        # self.calculateOffsets(self.selectedShapes, pos)\n        dp = pos - self.prevPoint\n        if dp:\n            for shape in shapes:\n                shape.moveBy(dp)\n            self.prevPoint = pos\n            return True\n        return False\n\n    def deSelectShape(self):\n        if self.selectedShapes:\n            self.setHiding(False)\n            self.selectionChanged.emit([])\n            self.update()\n\n    def deleteSelected(self):\n        deleted_shapes = []\n        if self.selectedShapes:\n            for shape in self.selectedShapes:\n                self.shapes.remove(shape)\n                deleted_shapes.append(shape)\n            self.storeShapes()\n            self.selectedShapes = []\n            self.update()\n        return deleted_shapes\n\n    def deleteShape(self, shape):\n        if shape in self.selectedShapes:\n            self.selectedShapes.remove(shape)\n        if shape in self.shapes:\n            self.shapes.remove(shape)\n        self.storeShapes()\n        self.update()\n\n    def copySelectedShapes(self):\n        if self.selectedShapes:\n            self.selectedShapesCopy = [s.copy() for s in self.selectedShapes]\n            self.boundedShiftShapes(self.selectedShapesCopy)\n            self.endMove(copy=True)\n        return self.selectedShapes\n\n    def boundedShiftShapes(self, shapes):\n        # Try to move in one direction, and if it fails in another.\n        # Give up if both fail.\n        point = shapes[0][0]\n        offset = QtCore.QPoint(2.0, 2.0)\n        self.offsets = QtCore.QPoint(), QtCore.QPoint()\n        self.prevPoint = point\n        if not self.boundedMoveShapes(shapes, point - offset):\n            self.boundedMoveShapes(shapes, point + offset)\n\n    def paintEvent(self, event):\n        if not self.pixmap and not self.is_loading:\n            return super(Canvas, self).paintEvent(event)\n\n        p = self._painter\n        p.begin(self)\n        p.setRenderHint(QtGui.QPainter.RenderHint.Antialiasing)\n        p.setRenderHint(QtGui.QPainter.RenderHint.SmoothPixmapTransform)\n        # p.setRenderHint(QtGui.QPainter.HighQualityAntialiasing)\n\n        p.scale(self.scale, self.scale)\n        p.translate(self.offsetToCenter())\n\n        p.drawPixmap(0, 0, self.pixmap)\n        Shape.scale = self.scale\n\n        # Draw loading/waiting screen\n        if self.is_loading:\n            # Draw a semi-transparent rectangle\n            p.setPen(QtCore.Qt.PenStyle.NoPen)\n            p.setBrush(QtGui.QColor(0, 0, 0, 100))\n            p.drawRect(self.pixmap.rect())\n\n            # Draw a spinning wheel\n            p.setPen(QtGui.QColor(255, 255, 255))\n            p.setBrush(QtCore.Qt.BrushStyle.NoBrush)\n            p.save()\n            p.translate(self.pixmap.width() / 2, self.pixmap.height() / 2 - 50)\n            p.rotate(self.loading_angle)\n            p.drawEllipse(-20, -20, 40, 40)\n            p.drawLine(0, 0, 0, -20)\n            p.restore()\n            self.loading_angle += 5\n            if self.loading_angle >= 360:\n                self.loading_angle = 0\n\n            # Draw the loading text\n            p.setPen(QtGui.QColor(255, 255, 255))\n            try:\n                fontsize = self.pixmap.width() / 50\n                p.setFont(QtGui.QFont(\"Arial\", fontsize))\n            except:\n                p.setFont(QtGui.QFont(\"Arial\", 20))\n            p.drawText(\n                self.pixmap.rect(),\n                QtCore.Qt.AlignmentFlag.AlignCenter,\n                self.loading_text,\n            )\n            p.end()\n            self.update()\n            return\n\n        for shape in self.shapes:\n            if (shape.selected or not self._hideBackround) and self.isVisible(\n                shape\n            ):\n                shape.fill = shape.selected or shape == self.hShape\n                shape.paint(p)\n        if self.current:\n            self.current.paint(p)\n            self.line.paint(p)\n        if self.selectedShapesCopy:\n            for s in self.selectedShapesCopy:\n                s.paint(p)\n\n        if (\n            self.fillDrawing()\n            and self.createMode == \"polygon\"\n            and self.current is not None\n            and len(self.current.points) >= 2\n        ):\n            drawing_shape = self.current.copy()\n            drawing_shape.addPoint(self.line[1])\n            drawing_shape.fill = True\n            drawing_shape.paint(p)\n\n        # Draw mouse coordinates\n        if self.show_cross_line:\n            pen = QtGui.QPen(\n                QtGui.QColor(\"#00FF00\"),\n                max(1, int(round(2.0 / Shape.scale))),\n                QtCore.Qt.PenStyle.DashLine,\n            )\n            p.setPen(pen)\n            p.setOpacity(0.5)\n            mouseX = min( self.pixmap.width() ,max(0, self.prevMovePoint.x()))\n            mouseY = min( self.pixmap.height() ,max(0, self.prevMovePoint.y()))\n            p.drawLine(\n                QtCore.QPointF(mouseX, 0),\n                QtCore.QPointF(mouseX, self.pixmap.height()),\n            )\n            p.drawLine(\n                QtCore.QPointF(0, mouseY),\n                QtCore.QPointF(self.pixmap.width(), mouseY),\n            )\n\n        # draw SAM rectangle\n        if len(self.SAM_rect) == 1:\n            pen = QtGui.QPen(\n                QtGui.QColor(\"#FF0000\"),\n                2 * max(1, int(round(2.0 / Shape.scale))),\n                QtCore.Qt.PenStyle.SolidLine,\n            )\n            p.setPen(pen)\n            p.setOpacity(0.8)\n\n            point1 = [self.SAM_rect[0].x(), self.SAM_rect[0].y()]\n            corrected = self.corrected_pos_into_pixmap(self.prevMovePoint)\n            point2 = [corrected.x(), corrected.y()]\n            x1 = min(point1[0], point2[0])\n            y1 = min(point1[1], point2[1])\n            w = abs(point1[0] - point2[0])\n            h = abs(point1[1] - point2[1])\n            p.drawRect(x1, y1, w, h)\n\n        # draw SAM points\n        if len(self.SAM_coordinates) != 0:\n            for point in self.SAM_coordinates:\n                color = \"#FF0000\" if point[2] == 0 else \"#19EB25\"\n                pen = QtGui.QPen(\n                    QtGui.QColor(color),\n                    5 * max(1, int(round(2.0 / Shape.scale))),\n                    QtCore.Qt.PenStyle.SolidLine,\n                    QtCore.Qt.PenCapStyle.RoundCap,\n                )\n                p.setPen(pen)\n                p.setOpacity(0.8)\n                p.drawPoint(point[0], point[1])\n\n        if len(self.SAM_rects) != 0:\n            box = self.SAM_rects[-1]\n            pen = QtGui.QPen(\n                QtGui.QColor(\"#2D7CFA\"),\n                2 * max(1, int(round(2.0 / Shape.scale))),\n                QtCore.Qt.PenStyle.SolidLine,\n            )\n            p.setPen(pen)\n            p.setOpacity(0.8)\n\n            point1 = [box[0].x(), box[0].y()]\n            point2 = [box[1].x(), box[1].y()]\n            x1 = min(point1[0], point2[0])\n            y1 = min(point1[1], point2[1])\n            w = abs(point1[0] - point2[0])\n            h = abs(point1[1] - point2[1])\n            p.drawRect(x1, y1, w, h)\n\n        if self.tracking_area != \"\":\n            pen = QtGui.QPen(\n                QtGui.QColor(\"#FF0000\"),\n                2 * max(1, int(round(2.0 / Shape.scale))),\n                QtCore.Qt.PenStyle.SolidLine,\n            )\n            p.setPen(pen)\n            p.setOpacity(0.1)\n            p.setBrush(QtGui.QColor(\"#FF0000\"));\n            if len(self.tracking_area_polygon) > 0:\n                corrected = self.corrected_pos_into_pixmap(self.prevMovePoint)\n                point2 = [corrected.x(), corrected.y()]\n                total = copy.deepcopy(self.tracking_area_polygon)\n                if self.tracking_area == \"drawing\":\n                    total.append(point2)\n                total = [ QtCore.QPoint(p[0], p[1]) for p in total]\n                p.drawPolygon(total)\n                p.setOpacity(0.7)\n                if self.tracking_area == \"drawing\":\n                    p.drawPolyline(total)\n                else:\n                    total.append(total[0])\n                    p.drawPolyline(total)\n\n\n        p.end()\n\n    def transformPos(self, point):\n        \"\"\"Convert from widget-logical coordinates to painter-logical ones.\"\"\"\n\n        return point / self.scale - QtCore.QPointF(self.offsetToCenter())\n\n    def offsetToCenter(self):\n        s = self.scale\n        area = super(Canvas, self).size()\n        w, h = self.pixmap.width() * s, self.pixmap.height() * s\n        aw, ah = area.width(), area.height()\n        x = (aw - w) / (2 * s) if aw > w else 0\n        y = (ah - h) / (2 * s) if ah > h else 0\n        return QtCore.QPoint(x, y)\n\n    def outOfPixmap(self, p):\n        w, h = self.pixmap.width(), self.pixmap.height()\n        return not (0 <= p.x() <= w - 1 and 0 <= p.y() <= h - 1)\n\n    def finalise(self, SAM_SHAPE=False):\n        if SAM_SHAPE:\n            assert self.SAM_current\n            self.SAM_current.close()\n            self.storeShapes()\n            self.SAM_current = None\n            self.setHiding(False)\n            self.newShape.emit()\n            self.update()\n        else:\n            assert self.current\n            self.current.close()\n            self.shapes.append(self.current)\n            self.storeShapes()\n            self.current = None\n            self.setHiding(False)\n            self.newShape.emit()\n            self.update()\n\n    def closeEnough(self, p1, p2):\n        # d = distance(p1 - p2)\n        # m = (p1-p2).manhattanLength()\n        # print \"d %.2f, m %d, %.2f\" % (d, m, d - m)\n        # divide by scale to allow more precision when zoomed in\n        return labelme.utils.distance(p1 - p2) < (self.epsilon / self.scale)\n\n    def intersectionPoint(self, p1, p2):\n        # Cycle through each image edge in clockwise fashion,\n        # and find the one intersecting the current line segment.\n        # http://paulbourke.net/geometry/lineline2d/\n        size = self.pixmap.size()\n        points = [\n            (0, 0),\n            (size.width() - 1, 0),\n            (size.width() - 1, size.height() - 1),\n            (0, size.height() - 1),\n        ]\n        # x1, y1 should be in the pixmap, x2, y2 should be out of the pixmap\n        x1 = min(max(p1.x(), 0), size.width() - 1)\n        y1 = min(max(p1.y(), 0), size.height() - 1)\n        x2, y2 = p2.x(), p2.y()\n        d, i, (x, y) = min(self.intersectingEdges((x1, y1), (x2, y2), points))\n        x3, y3 = points[i]\n        x4, y4 = points[(i + 1) % 4]\n        if (x, y) == (x1, y1):\n            # Handle cases where previous point is on one of the edges.\n            if x3 == x4:\n                return QtCore.QPoint(x3, min(max(0, y2), max(y3, y4)))\n            else:  # y3 == y4\n                return QtCore.QPoint(min(max(0, x2), max(x3, x4)), y3)\n        return QtCore.QPoint(x, y)\n\n    def intersectingEdges(self, point1, point2, points):\n        \"\"\"Find intersecting edges.\n\n        For each edge formed by `points', yield the intersection\n        with the line segment `(x1,y1) - (x2,y2)`, if it exists.\n        Also return the distance of `(x2,y2)' to the middle of the\n        edge along with its index, so that the one closest can be chosen.\n        \"\"\"\n        (x1, y1) = point1\n        (x2, y2) = point2\n        for i in range(4):\n            x3, y3 = points[i]\n            x4, y4 = points[(i + 1) % 4]\n            denom = (y4 - y3) * (x2 - x1) - (x4 - x3) * (y2 - y1)\n            nua = (x4 - x3) * (y1 - y3) - (y4 - y3) * (x1 - x3)\n            nub = (x2 - x1) * (y1 - y3) - (y2 - y1) * (x1 - x3)\n            if denom == 0:\n                # This covers two cases:\n                #   nua == nub == 0: Coincident\n                #   otherwise: Parallel\n                continue\n            ua, ub = nua / denom, nub / denom\n            if 0 <= ua <= 1 and 0 <= ub <= 1:\n                x = x1 + ua * (x2 - x1)\n                y = y1 + ua * (y2 - y1)\n                m = QtCore.QPoint((x3 + x4) / 2, (y3 + y4) / 2)\n                d = labelme.utils.distance(m - QtCore.QPoint(x2, y2))\n                yield d, i, (x, y)\n\n    # These two, along with a call to adjustSize are required for the\n    # scroll area.\n    def sizeHint(self):\n        return self.minimumSizeHint()\n\n    def minimumSizeHint(self):\n        if self.pixmap:\n            return self.scale * self.pixmap.size()\n        return super(Canvas, self).minimumSizeHint()\n\n    def wheelEvent(self, ev):\n        mods = ev.modifiers()\n        delta = ev.angleDelta()\n        if mods.value:\n            # with Ctrl/Command key\n            # zoom\n            self.zoomRequest.emit(delta.y(), ev.position().toPoint())\n        else:\n            # scroll\n            self.scrollRequest.emit(delta.x(), QtCore.Qt.Orientation.Horizontal.value)\n            self.scrollRequest.emit(delta.y(), QtCore.Qt.Orientation.Vertical.value)\n        ev.accept()\n\n    def keyPressEvent(self, ev):\n        key = ev.key()\n        if key == QtCore.Qt.Key.Key_Return:\n            if self.SAM_mode != \"\":\n                self.samFinish.emit()\n            elif self.tracking_area:\n                self.tracking_area = \"drawn\"\n                self.update()\n            elif self.canCloseShape():\n                self.finalise()\n            \n    def cancelManualDrawing(self):\n        self.current = None\n        self.drawingPolygon.emit(False)\n        self.update()\n\n    def setLastLabel(self, text, flags):\n        assert text\n        self.shapes[-1].label = text\n        self.shapes[-1].flags = flags\n        self.shapesBackups.pop()\n        self.storeShapes()\n        return self.shapes[-1]\n\n    def undoLastLine(self):\n        assert self.shapes\n        self.current = self.shapes.pop()\n        self.current.setOpen()\n        if self.createMode in [\"polygon\"]:\n            self.line.points = [self.current[-1], self.current[0]]\n        self.drawingPolygon.emit(True)\n\n    def undoLastPoint(self):\n        if not self.current or self.current.isClosed():\n            return\n        self.current.popPoint()\n        if len(self.current) > 0:\n            self.line[0] = self.current[-1]\n        else:\n            self.current = None\n            self.drawingPolygon.emit(False)\n        self.update()\n\n    def loadPixmap(self, pixmap, clear_shapes=True):\n        self.pixmap = pixmap\n        if clear_shapes:\n            self.shapes = []\n        self.update()\n\n    def loadShapes(self, shapes, replace=True):\n        if replace:\n            self.shapes = list(shapes)\n        else:\n            self.shapes.extend(shapes)\n        self.storeShapes()\n        self.current = None\n        self.hShape = None\n        self.hVertex = None\n        self.hEdge = None\n        self.update()\n\n    def setShapeVisible(self, shape, value):\n        self.visible[shape] = value\n        self.update()\n\n    def overrideCursor(self, cursor):\n        self.restoreCursor()\n        self._cursor = cursor\n        QtWidgets.QApplication.setOverrideCursor(cursor)\n\n    def restoreCursor(self):\n        QtWidgets.QApplication.restoreOverrideCursor()\n\n    def resetState(self):\n        self.restoreCursor()\n        self.pixmap = None\n        self.shapesBackups = []\n        self.update()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/check_updates_UI.py",
    "content": "from labelme.widgets.links import open_release\nfrom bs4 import BeautifulSoup\nimport requests\nfrom PyQt6.QtWidgets import QMessageBox, QLabel\nfrom PyQt6.QtCore import Qt\nfrom PyQt6.QtGui import QFont\nfrom PyQt6 import QtWidgets\nimport time\n    \n    \ndef PopUp():\n    \"\"\"\n    Check for updates of DLTA-AI and display a message box with the result.\n\n    The function checks the latest release of DLTA-AI on GitHub and compares it with the current version.\n    If the latest release is newer than the current version, a message box is displayed with a button to\n    download the latest version. Otherwise, a message box is displayed indicating that the user is using\n    the latest version.\n\n    Args:\n        None\n\n    Returns:\n        None\n    \"\"\"\n\n    # Import the current version of DLTA-AI\n    from labelme import __version__\n\n    # Initialize variables\n    updates = False\n    tag = {}\n    tag[\"href\"] = None\n\n    try:\n        # Get the HTML content of the releases page on GitHub\n        url = \"https://github.com/0ssamaak0/DLTA-AI/releases\"\n        html = requests.get(url, timeout=5).text\n        soup = BeautifulSoup(html, \"html.parser\")\n\n        # Find the first <a> tag with class=\"Link--primary\"\n        tag = soup.find(\"a\", class_=\"Link--primary\")\n\n        # Split the tag text on the first \"v\" to get the latest version number\n        lastest_version = tag.text.lower().split(\"v\")[1]\n\n        # Compare the latest version with the current version\n        if lastest_version != __version__:\n            text = f\"New version of DLTA-AI (v{lastest_version}) is available.\\n You are currently using (v{__version__})\\n\"\n            updates = True\n        else:\n            text = f\"you are using the latest version of DLTA-AI (v{__version__})\\n\"\n    except:\n        text = f\"You are using DLTA-AI (v{__version__})\\n There was an error checking for updates.\\n\"\n\n    # Create a message box with the result\n    msgBox = QMessageBox()\n    msgBox.setWindowTitle(\"Check for Updates\")\n    msgBox.setFont(QFont(\"Arial\", 10))  # Set the font size to 10\n\n    # Add the text label to the message box\n    msgBox.setText(text)\n\n    # If there are updates, add a button to download the latest version\n    if updates:\n        msgBox.addButton(QMessageBox.StandardButton.Yes)\n        msgBox.button(QMessageBox.StandardButton.Yes).setText(\"Get the Latest Version\")\n        msgBox.button(QMessageBox.StandardButton.Yes).clicked.connect(lambda: open_release(tag[\"href\"]))\n\n    # Add a close button to the message box\n    msgBox.addButton(QMessageBox.StandardButton.Close)\n    msgBox.button(QMessageBox.StandardButton.Close).setText(\"Close\")\n\n    # Display the message box\n    msgBox.exec()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/color_dialog.py",
    "content": "from PyQt6 import QtWidgets\n\n\nclass ColorDialog(QtWidgets.QColorDialog):\n    def __init__(self, parent=None):\n        super(ColorDialog, self).__init__(parent)\n        self.setOption(QtWidgets.QColorDialog.ColorDialogOption.ShowAlphaChannel)\n        # The Mac native dialog does not support our restore button.\n        self.setOption(QtWidgets.QColorDialog.ColorDialogOption.DontUseNativeDialog)\n        # Add a restore defaults button.\n        # The default is set at invocation time, so that it\n        # works across dialogs for different elements.\n        self.default = None\n        self.bb = self.findChild(QtWidgets.QDialogButtonBox)\n        self.bb.addButton(QtWidgets.QDialogButtonBox.StandardButton.RestoreDefaults)\n        self.bb.clicked.connect(self.checkRestore)\n\n    def getColor(self, value=None, title=None, default=None):\n        self.default = default\n        if title:\n            self.setWindowTitle(title)\n        if value:\n            self.setCurrentColor(value)\n        return self.currentColor() if self.exec() else None\n\n    def checkRestore(self, button):\n        if (\n            self.bb.buttonRole(button) & QtWidgets.QDialogButtonBox.ButtonRole.ResetRole\n            and self.default\n        ):\n            self.setCurrentColor(self.default)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/deleteSelectedShape_UI.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6.QtCore import Qt\nfrom PyQt6 import QtWidgets\n\n\ndef PopUp(TOTAL_VIDEO_FRAMES, INDEX_OF_CURRENT_FRAME, config):\n    \n    \"\"\"\n    Summary:\n        Show a dialog to choose the deletion options.\n        (   This Frame and All Previous Frames,\n            This Frame and All Next Frames,\n            All Frames,\n            This Frame Only,\n            Specific Range of Frames           )\n            \n    Args:\n        TOTAL_VIDEO_FRAMES: the total number of frames\n        config: a dictionary of configurations\n        \n    Returns:\n        result: the result of the dialog\n        config: the updated dictionary of configurations\n        fromFrameVAL: the start frame of the deletion range\n        toFrameVAL: the end frame of the deletion range\n    \"\"\"\n    \n    dialog = QtWidgets.QDialog()\n    dialog.setWindowTitle(\"Choose Deletion Options\")\n    dialog.setWindowModality(Qt.WindowModality.ApplicationModal)\n    dialog.resize(500, 100)\n    dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n    layout = QtWidgets.QVBoxLayout()\n\n    label = QtWidgets.QLabel(\"Choose Deletion Options\")\n    layout.addWidget(label)\n\n    prev = QtWidgets.QRadioButton(\"This Frame and All Previous Frames\")\n    next = QtWidgets.QRadioButton(\"This Frame and All Next Frames\")\n    all = QtWidgets.QRadioButton(\n        \"All Frames\")\n    only = QtWidgets.QRadioButton(\"This Frame Only\")\n\n    from_to = QtWidgets.QRadioButton(\n        \"Specific Range of Frames\")\n    from_frame = QtWidgets.QSpinBox()\n    to_frame = QtWidgets.QSpinBox()\n    from_frame.setRange(1, TOTAL_VIDEO_FRAMES)\n    to_frame.setRange(1, TOTAL_VIDEO_FRAMES)\n    from_frame.valueChanged.connect(lambda: from_to.toggle())\n    to_frame.valueChanged.connect(lambda: from_to.toggle())\n\n    from_label = QtWidgets.QLabel(\"From:\")\n    to_label = QtWidgets.QLabel(\"To:\")\n\n    if config['deleteDefault'] == 'This Frame and All Previous Frames':\n        prev.toggle()\n    if config['deleteDefault'] == 'This Frame and All Next Frames':\n        next.toggle()\n    if config['deleteDefault'] == 'All Frames':\n        all.toggle()\n    if config['deleteDefault'] == 'This Frame Only':\n        only.toggle()\n    if config['deleteDefault'] == 'Specific Range of Frames':\n        from_to.toggle()\n\n    prev.toggled.connect(lambda: config.update(\n        {'deleteDefault': 'This Frame and All Previous Frames'}))\n    next.toggled.connect(lambda: config.update(\n        {'deleteDefault': 'This Frame and All Next Frames'}))\n    all.toggled.connect(lambda: config.update(\n        {'deleteDefault': 'All Frames'}))\n    only.toggled.connect(lambda: config.update(\n        {'deleteDefault': 'This Frame Only'}))\n    from_to.toggled.connect(lambda: config.update(\n        {'deleteDefault': 'Specific Range of Frames'}))\n\n\n    button_layout = QtWidgets.QHBoxLayout()\n    button_layout.addWidget(only)\n    button_layout.addWidget(all)\n    layout.addLayout(button_layout)\n\n    button_layout = QtWidgets.QHBoxLayout()\n    button_layout.addWidget(prev)\n    button_layout.addWidget(next)\n    layout.addLayout(button_layout)\n\n    layout.addWidget(from_to)\n\n    button_layout = QtWidgets.QHBoxLayout()\n    button_layout.addWidget(from_label)\n    button_layout.addWidget(from_frame)\n    button_layout.addWidget(to_label)\n    button_layout.addWidget(to_frame)\n    layout.addLayout(button_layout)\n\n    buttonBox = QtWidgets.QDialogButtonBox(\n        QtWidgets.QDialogButtonBox.StandardButton.Ok)\n    buttonBox.accepted.connect(dialog.accept)\n    buttonBox.rejected.connect(dialog.reject)\n    layout.addWidget(buttonBox)\n    dialog.setLayout(layout)\n    result = dialog.exec()\n    \n    mode = config['deleteDefault']\n    fromFrameVAL = from_frame.value() \n    toFrameVAL  = to_frame.value()\n    \n    if mode == 'This Frame and All Previous Frames':\n        toFrameVAL = INDEX_OF_CURRENT_FRAME\n        fromFrameVAL = 1\n    elif mode == 'This Frame and All Next Frames':\n        toFrameVAL = TOTAL_VIDEO_FRAMES\n        fromFrameVAL = INDEX_OF_CURRENT_FRAME\n    elif mode == 'This Frame Only':\n        toFrameVAL = INDEX_OF_CURRENT_FRAME\n        fromFrameVAL = INDEX_OF_CURRENT_FRAME\n    elif mode == 'All Frames':\n        toFrameVAL = TOTAL_VIDEO_FRAMES\n        fromFrameVAL = 1\n    \n    return result, config, fromFrameVAL, toFrameVAL\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/editLabel_videoMode.py",
    "content": "from PyQt6.QtCore import Qt\nfrom PyQt6 import QtWidgets\nfrom labelme.widgets.MsgBox import OKmsgBox\nfrom labelme.utils.helpers.mathOps import coco_classes\nimport copy\n\n\ndef editLabel_idChanged_UI(config, old_group_id, new_group_id, id_frames_rec, INDEX_OF_CURRENT_FRAME):\n    \n    idChanged = old_group_id != new_group_id\n    \n    if not idChanged:\n        result = QtWidgets.QDialog.DialogCode.Accepted\n        only_this_frame = False\n        duplicates = False\n        return result, config, only_this_frame, duplicates\n    \n    dialog = QtWidgets.QDialog()\n    dialog.setWindowTitle(\"Choose Edit Options\")\n    dialog.setWindowModality(Qt.WindowModality.ApplicationModal)\n    dialog.resize(250, 100)\n\n    layout = QtWidgets.QVBoxLayout()\n\n    label = QtWidgets.QLabel(\"Choose Edit Options\")\n    layout.addWidget(label)\n\n    only = QtWidgets.QRadioButton(\"Edit only this frame\")\n    all = QtWidgets.QRadioButton(\"Edit all frames with this ID\")\n\n    if config['EditDefault'] == 'Edit only this frame':\n        only.toggle()\n    if config['EditDefault'] == 'Edit all frames with this ID':\n        all.toggle()\n\n    only.toggled.connect(lambda: config.update(\n        {'EditDefault': 'Edit only this frame'}))\n    all.toggled.connect(lambda: config.update(\n        {'EditDefault': 'Edit all frames with this ID'}))\n\n    layout.addWidget(only)\n    layout.addWidget(all)\n\n    buttonBox = QtWidgets.QDialogButtonBox(\n        QtWidgets.QDialogButtonBox.StandardButton.Ok)\n    buttonBox.accepted.connect(dialog.accept)\n    layout.addWidget(buttonBox)\n    dialog.setLayout(layout)\n    result = dialog.exec()\n    only_this_frame = config['EditDefault'] == 'Edit only this frame'\n    duplicates = check_duplicates_editLabel(id_frames_rec, old_group_id, new_group_id, only_this_frame, idChanged, INDEX_OF_CURRENT_FRAME)\n    return result, config, only_this_frame, duplicates\n\n\ndef check_duplicates_editLabel(id_frames_rec, old_group_id, new_group_id, only_this_frame, idChanged, currFrame):\n    \n    \"\"\"\n    Summary:\n        Check if there are id duplicates in any frame if the id is changed.\n        \n    Args:\n        id_frames_rec: a dictionary of id frames records\n        old_group_id: the old id\n        new_group_id: the new id\n        only_this_frame: a flag to indicate if the id is changed only in the current frame or in all frames\n        idChanged: a flag to indicate if the id is changed or not (if False, the function returns False as there is no change)\n        currFrame: the current frame index\n        \n    Returns:\n        True if there will be duplicates, False otherwise\n    \"\"\"\n    \n    if not idChanged:\n        return False\n    \n    # frame record of the old id\n    old_id_frame_record = copy.deepcopy(\n        id_frames_rec['id_' + str(old_group_id)])\n    \n    # frame record of the new id\n    try:\n        new_id_frame_record = copy.deepcopy(\n            id_frames_rec['id_' + str(new_group_id)])\n    except:\n        new_id_frame_record = set()\n        pass\n\n    # if the change is only in the current frame\n    if only_this_frame:\n        # check if the new id exists in the current frame\n        Intersection = new_id_frame_record.intersection({currFrame})\n        if len(Intersection) != 0:\n            OKmsgBox(\"Warning\",\n                        f\"Two shapes with the same ID exists.\\nApparantly, a shape with ID ({new_group_id}) already exists with another shape with ID ({old_group_id}) in the CURRENT FRAME and the edit will result in two shapes with the same ID in the same frame.\\n\\n The edit is NOT performed.\")\n            return True\n    \n    # if the change is in all frames\n    else:\n        # check if the new id exists in any frame that the old id exists\n        Intersection = old_id_frame_record.intersection(new_id_frame_record)\n        if len(Intersection) != 0:\n            reduced_Intersection = reducing_Intersection(Intersection)\n            OKmsgBox(\"ID already exists\",\n                        f'Two shapes with the same ID exists in at least one frame.\\nApparantly, a shape with ID ({new_group_id}) already exists with another shape with ID ({old_group_id}).\\nLike in frames ({reduced_Intersection}) and the edit will result in two shapes with the same ID ({new_group_id}).\\n\\n The edit is NOT performed.')\n            return True\n\n    return False\n\n\ndef editLabel_handle_data(currFrame, listObj,\n                        trajectories, id_frames_rec, \n                        idChanged, only_this_frame, shape,\n                        old_group_id, new_group_id = None):\n    \n    \"\"\"\n    Summary:\n        Handle id change in edit label.\n        Check if the id is changed or not.\n        If the id is changed, transfer the frames from the old id to the new id.\n            two cases:\n                1- only_this_frame: transfer only the current frame\n                2- not only_this_frame: transfer all the frames\n        If the id is not changed, update the id in the current frame.\n        \n    Args:\n        currFrame: the current frame index\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n        trajectories: a dictionary of trajectories\n        id_frames_rec: a dictionary of id frames records\n        idChanged: a flag to indicate if the id is changed or not\n        only_this_frame: a flag to indicate if the id is changed only in the current frame or in all frames\n        shape: the shape to update\n        old_group_id: the old id\n        new_group_id: the new id, if None then the old id is used (no id change)\n        \n    Returns:\n        id_frames_rec: a dictionary of id frames records\n        trajectories: a dictionary of trajectories\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n    \"\"\"\n    \n    if new_group_id is None or not idChanged:\n        new_group_id = old_group_id\n    \n    if not idChanged:\n        old_frames = id_frames_rec['id_' + str(old_group_id)]\n        listObj = update_id_in_listObjframes(listObj, old_frames, shape, old_group_id)\n    \n    elif idChanged and only_this_frame:\n        transfer_rec_and_traj(old_group_id, id_frames_rec, trajectories, [currFrame], new_group_id)\n        update_id_in_listObjframe(listObj, currFrame, shape, old_group_id, new_group_id)\n        new_frames = id_frames_rec['id_' + str(new_group_id)]\n        update_id_in_listObjframes(listObj, new_frames, shape, new_group_id)\n        \n    elif idChanged and not only_this_frame:\n        old_frames = id_frames_rec['id_' + str(old_group_id)]\n        transfer_rec_and_traj(old_group_id, id_frames_rec, trajectories, old_frames, new_group_id)\n        update_id_in_listObjframes(listObj, old_frames, shape, old_group_id, new_group_id)\n        new_frames = id_frames_rec['id_' + str(new_group_id)]\n        update_id_in_listObjframes(listObj, new_frames, shape, new_group_id)\n    \n    return id_frames_rec, trajectories, listObj\n    \n\ndef update_id_in_listObjframe(listObj, frame, shape, old_id, new_id = None):\n        \n        \"\"\"\n        Summary:\n            Update the id of a shape in a frame in listObj.\n            \n        Args:\n            listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n            frame: the frame to update\n            shape: the shape to update\n            old_id: the old id\n            new_id: the new id, if None then the old id is used (no id change)\n            \n        Returns:\n            listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n        \"\"\"\n        \n        new_id = old_id if new_id is None else new_id\n        \n        for object_ in listObj[frame - 1]['frame_data']:\n            if object_['tracker_id'] == old_id:\n                object_['tracker_id'] = new_id\n                object_['class_name'] = shape.label\n                object_['confidence'] = str(1.0)\n                object_['class_id'] = coco_classes.index(\n                    shape.label) if shape.label in coco_classes else -1\n                break\n            \n        return listObj\n\n  \ndef update_id_in_listObjframes(listObj, frames, shape, old_id, new_id = None):\n    \n    \"\"\"\n    Summary:\n        Update the id of a shape in a list of frames in listObj.\n        \n    Args:\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n        frames: a list of frames to update\n        shape: the shape to update\n        old_id: the old id\n        new_id: the new id, if None then the old id is used (no id change)\n        \n    Returns:\n        listObj: a list of objects (each object is a dictionary of a frame with keys (frame_idx, frame_data))\n    \"\"\"\n    \n    for frame in frames:\n        listObj = update_id_in_listObjframe(listObj, frame, shape, old_id, new_id)\n        \n    return listObj\n\n\ndef transfer_rec_and_traj(id, id_frames_rec, trajectories, frames, new_id):\n    \n    \"\"\"\n    Summary:\n        Transfer frames from an id to another id.\n        \n    Args:\n        id: the id to transfer from\n        id_frames_rec: a dictionary of id frames records\n        trajectories: a dictionary of trajectories\n        frames: a list of frames to transfer\n        new_id: the id to transfer to\n        \n    Returns:\n        id_frames_rec: a dictionary of id frames records\n        trajectories: a dictionary of trajectories\n    \"\"\"\n    \n    # old id frame record and trajectory\n    id_rec = id_frames_rec['id_' + str(id)]\n    id_traj = trajectories['id_' + str(id)]\n    \n    # new id frame record and trajectory\n    try:\n        new_id_rec = id_frames_rec['id_' + str(new_id)]\n        new_id_traj = trajectories['id_' + str(new_id)]\n    except:\n        new_id_rec = set()\n        new_id_traj = [(-1, -1)] * len(id_traj)\n        \n    # transfer frames\n    id_rec = id_rec - set(frames)\n    new_id_rec = new_id_rec.union(set(frames))\n    \n    # transfer trajectories\n    for frame in frames:\n        new_id_traj[frame - 1] = id_traj[frame - 1]\n        id_traj[frame - 1] = (-1, -1)\n    \n    id_frames_rec['id_' + str(id)] = id_rec\n    id_frames_rec['id_' + str(new_id)] = new_id_rec\n    trajectories['id_' + str(id)] = id_traj\n    trajectories['id_' + str(new_id)] = new_id_traj\n    \n    return id_frames_rec, trajectories\n\n\ndef reducing_Intersection(Intersection):\n    \n    \"\"\"\n    Summary:\n        Reduce the intersection of two sets to a string.\n        Make all the consecutive numbers in the intersection as a range.\n            example: [1, 2, 3, 4, 5, 7, 8, 9] -> \"1 to 5, 7 to 9\"\n        \n    Args:\n        Intersection: the intersection of two sets\n        \n    Returns:\n        reduced_Intersection: the reduced intersection as a string\n    \"\"\"\n    \n    Intersection = list(Intersection)\n    Intersection.sort()\n    \n    reduced_Intersection = \"\"\n    reduced_Intersection += str(Intersection[0])\n    \n    flag = False\n    i = 1\n    while(i < len(Intersection)):\n        if Intersection[i] - Intersection[i - 1] == 1:\n            reduced_Intersection += \" to \" if not flag else \"\"\n            flag = True\n            if i + 1 == len(Intersection):\n                reduced_Intersection += str(Intersection[i])\n        else:\n            if flag:\n                reduced_Intersection += str(Intersection[i - 1])\n                if i + 1 < len(Intersection):\n                    reduced_Intersection += \", \" + str(Intersection[i])\n                    i += 1\n                flag = False\n            else:\n                reduced_Intersection += \", \" + str(Intersection[i])\n        i += 1\n    \n    return reduced_Intersection\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/escapable_qlist_widget.py",
    "content": "from PyQt6.QtCore import Qt\nfrom PyQt6 import QtWidgets\n\n\nclass EscapableQListWidget(QtWidgets.QListWidget):\n    def keyPressEvent(self, event):\n        super(EscapableQListWidget, self).keyPressEvent(event)\n        if event.key() == Qt.Key.Key_Escape:\n            self.clearSelection()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/exportData_UI.py",
    "content": "from PyQt6.QtCore import Qt\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\nfrom labelme.widgets import open_file\n\ntry:\n    from labelme.utils.custom_exports import custom_exports_list\nexcept:\n    custom_exports_list = []\n    print(\"custom_exports file not found\")\n\n\n\n\ndef PopUp(mode = \"video\"):\n    \"\"\"\n    Displays a dialog box for choosing export options for annotations and videos.\n\n    Args:\n        mode (str): The mode of the export. Can be either \"video\" or \"image\". Defaults to \"video\".\n\n    Returns:\n        A tuple containing the result of the dialog box and the selected export options. If the dialog box is accepted, the first element of the tuple is `QtWidgets.QDialog.DialogCode.Accepted`. Otherwise, it is `QtWidgets.QDialog.Rejected`. The second element of the tuple is a boolean indicating whether to export annotations in COCO format. If `mode` is \"video\", the third element of the tuple is a boolean indicating whether to export annotations in MOT format, and the fourth element is a boolean indicating whether to export the video with the current visualization settings. If there are any custom export options available, the fifth element of the tuple is a list of booleans indicating whether to export using each custom export option.\n    \"\"\"\n\n    dialog = QtWidgets.QDialog()\n    dialog.setWindowTitle(\"Choose Export Options\")\n    dialog.setWindowModality(Qt.WindowModality.ApplicationModal)\n    dialog.resize(250, 100)\n\n    layout = QtWidgets.QVBoxLayout()\n\n    font = QtGui.QFont()\n    font.setBold(True)\n    font.setPointSize(10)\n\n    if mode == \"video\":\n        vid_label = QtWidgets.QLabel(\"Export Video\")\n        vid_label.setFont(font)\n        vid_label.setMargin(7)\n\n    std_label = QtWidgets.QLabel(\"Export Annotations (Standard Formats)\")\n    std_label.setFont(font)\n    std_label.setMargin(7)\n\n    custom_label = QtWidgets.QLabel(\"Export Annotations (Custom Formats)\")\n    custom_label.setFont(font)\n    custom_label.setMargin(7)\n    \n    # Create a button group to hold the radio buttons\n    button_group = QtWidgets.QButtonGroup()\n\n    # Create the radio buttons and add them to the button group\n    coco_radio = QtWidgets.QRadioButton(\n        \"COCO Format (Detection / Segmentation)\")\n    \n    # make the video and mot radio buttons if the mode is video\n    if mode == \"video\":\n        video_radio = QtWidgets.QRadioButton(\"Export Video with current visualization settings\")\n        mot_radio = QtWidgets.QRadioButton(\"MOT Format (Tracking)\")\n\n    # make the custom exports radio buttons\n    custom_exports_radio_list = []\n    if len(custom_exports_list) != 0:\n        for custom_exp in custom_exports_list:\n            if custom_exp.mode == \"video\" and mode == \"video\":\n                custom_radio = QtWidgets.QRadioButton(custom_exp.button_name)\n                button_group.addButton(custom_radio)\n                custom_exports_radio_list.append(custom_radio)\n            if custom_exp.mode == \"image\" and mode == \"image\":\n                custom_radio = QtWidgets.QRadioButton(custom_exp.button_name)\n                button_group.addButton(custom_radio)\n                custom_exports_radio_list.append(custom_radio)\n            \n    \n    button_group.addButton(coco_radio)\n    \n    # add the video and mot radio buttons to the button group if the mode is video\n    if mode == \"video\":\n        button_group.addButton(video_radio)\n        button_group.addButton(mot_radio)\n\n    # Add custom radio buttons to the button group\n    if len(custom_exports_list) != 0:\n        for custom_radio in custom_exports_radio_list:\n            button_group.addButton(custom_radio)\n\n    # Add to the layout\n\n    # video label and radio buttons\n    if mode == \"video\":\n        layout.addWidget(vid_label)\n        layout.addWidget(video_radio)\n\n    # standard label and radio buttons\n    layout.addWidget(std_label)\n    layout.addWidget(coco_radio)\n    if mode == \"video\":\n        layout.addWidget(mot_radio)\n\n    # custom label and radio buttons\n    layout.addWidget(custom_label)\n    if len(custom_exports_radio_list) != 0:\n        for custom_radio in custom_exports_radio_list:\n            layout.addWidget(custom_radio)\n    else:\n        layout.addWidget(QtWidgets.QLabel(\"No Custom Exports Available, you can add them in utils.custom_exports.py\"))\n\n    # create button when clicking it open custom_exports.py file\n    custom_exports_button = QtWidgets.QPushButton(\"Open Custom Exports\")\n    custom_exports_button.clicked.connect(open_file.PopUp)\n    layout.addWidget(custom_exports_button)\n\n    buttonBox = QtWidgets.QDialogButtonBox(\n        QtWidgets.QDialogButtonBox.StandardButton.Ok | QtWidgets.QDialogButtonBox.StandardButton.Cancel)\n    buttonBox.accepted.connect(dialog.accept)\n    buttonBox.rejected.connect(dialog.reject)\n\n    layout.addWidget(buttonBox)\n\n    dialog.setLayout(layout)\n\n    result = dialog.exec()\n\n    # prepare the checked list of custom exports\n    custom_exports_radio_checked_list = []\n    if len(custom_exports_list) != 0:\n        for custom_radio in custom_exports_radio_list:\n            custom_exports_radio_checked_list.append(custom_radio.isChecked())\n    \n    if mode == \"video\":\n        return result, coco_radio.isChecked(), mot_radio.isChecked(), video_radio.isChecked(), custom_exports_radio_checked_list\n    else:\n        return result, coco_radio.isChecked(), custom_exports_radio_checked_list\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/feedback_UI.py",
    "content": "from labelme.widgets.links import open_issue\nfrom PyQt6.QtWidgets import QMessageBox\nfrom PyQt6.QtCore import Qt\n\n\ndef PopUp():\n    \"\"\"\n    Displays a dialog box for providing feedback on the DLTA-AI project.\n\n    Parameters:\n    None\n\n    Returns:\n    None\n    \"\"\"\n    # Define the text for the feedback dialog box\n    text = \"Found a bug? 🐞\\nWant to suggest a feature? 🌟\\n\"\n    \n    # Create the feedback dialog box\n    msgBox = QMessageBox()\n    msgBox.setWindowTitle(\"Feedback\")\n    msgBox.setText(text)\n\n    # Add a button to open the GitHub issues page\n    msgBox.addButton(QMessageBox.StandardButton.Yes)\n    msgBox.button(QMessageBox.StandardButton.Yes).setText(\"Open an Issue\")\n    msgBox.button(QMessageBox.StandardButton.Yes).clicked.connect(open_issue)\n\n    # Add a close button\n    msgBox.addButton(QMessageBox.StandardButton.Close)\n    msgBox.button(QMessageBox.StandardButton.Close).setText(\"Close\")\n\n    # Display the feedback dialog box\n    msgBox.exec()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/getIDfromUser_UI.py",
    "content": "from PyQt6.QtCore import Qt\nfrom PyQt6 import QtWidgets\nfrom .MsgBox import OKmsgBox\nfrom labelme.utils.helpers.mathOps import is_id_repeated\n\n\ndef PopUp(self, group_id, text):\n    \n    \"\"\"\n    Summary:\n        Show a dialog to get a new id from the user.\n        check if the id is repeated.\n        \n    Args:\n        self: the main window object to access the canvas\n        group_id: the group id\n        text: Class name\n        \n    Returns:\n        group_id: the new group id\n        text: Class name (False if the user-input id is repeated)\n    \"\"\"    \n    \n    mainTEXT = \"A Shape with that ID already exists in this frame.\\n\\n\"\n    repeated = 0\n\n    while is_id_repeated(self, group_id):\n        dialog = QtWidgets.QDialog()\n        dialog.setWindowTitle(\"ID already exists\")\n        dialog.setWindowModality(Qt.WindowModality.ApplicationModal)\n        dialog.resize(450, 100)\n\n        if repeated == 0:\n            label = QtWidgets.QLabel(mainTEXT + f'Please try a new ID: ')\n        if repeated == 1:\n            label = QtWidgets.QLabel(\n                mainTEXT + f'OH GOD.. AGAIN? I hpoe you are not doing this on purpose..')\n        if repeated == 2:\n            label = QtWidgets.QLabel(\n                mainTEXT + f'AGAIN? REALLY? LAST time for you..')\n        if repeated == 3:\n            text = False\n            return group_id, text\n\n        properID = QtWidgets.QSpinBox()\n        properID.setRange(1, 1000)\n\n        buttonBox = QtWidgets.QDialogButtonBox(\n            QtWidgets.QDialogButtonBox.StandardButton.Ok)\n        buttonBox.accepted.connect(dialog.accept)\n\n        layout = QtWidgets.QVBoxLayout()\n        layout.addWidget(label)\n        layout.addWidget(properID)\n        layout.addWidget(buttonBox)\n        dialog.setLayout(layout)\n        result = dialog.exec()\n        if result != QtWidgets.QDialog.DialogCode.Accepted:\n            text = False\n            return group_id, text\n\n        group_id = properID.value()\n        repeated += 1\n\n    if repeated > 1:\n        OKmsgBox(\"Finally..!\", \"OH, Finally..!\")\n\n    return group_id, text\n\n\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/interpolation_UI.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6.QtCore import Qt\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\n\n\ndef PopUp(config):\n    \n    \"\"\"\n    Summary:\n        Show a dialog to choose the interpolation options.\n        (   interpolate only missed frames between detected frames, \n            interpolate all frames between your KEY frames, \n            interpolate ALL frames with SAM (more precision, more time) )\n            \n    Args:\n        config: a dictionary of configurations\n        \n    Returns:\n        result: the result of the dialog\n        config: the updated dictionary of configurations\n    \"\"\"\n    \n    def show_unshow_overwrite():\n        if with_sam.isChecked():\n            config.update({'interpolationDefMethod': 'SAM'})\n            overwrite_checkBox.setEnabled(True)\n        else:\n            config.update({'interpolationDefMethod': 'Linear'})\n            overwrite_checkBox.setEnabled(False)\n    \n    dialog = QtWidgets.QDialog()\n    dialog.setWindowTitle(\"Choose Interpolation Options\")\n    dialog.setWindowModality(Qt.WindowModality.ApplicationModal)\n    dialog.resize(250, 100)\n    dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n    layout = QtWidgets.QVBoxLayout()\n\n    label = QtWidgets.QLabel(\"Choose Interpolation Options\")\n    label.setFont(QtGui.QFont(\"Arial\", 10))\n    method_label = QtWidgets.QLabel(\"Interpolation Method\")\n    between_label = QtWidgets.QLabel(\"Interpolation Between\")\n\n    layout.addWidget(label)\n    \n\n    # Interpolation Method button group\n    method_group = QtWidgets.QButtonGroup()\n    with_linear = QtWidgets.QRadioButton(\"Linear Interpolation\")\n    with_sam = QtWidgets.QRadioButton(\"SAM Interpolation\")\n    method_group.addButton(with_linear)\n    method_group.addButton(with_sam)\n\n\n    with_linear.toggled.connect(show_unshow_overwrite)\n    with_sam.toggled.connect(show_unshow_overwrite)\n\n    layout.addWidget(method_label)\n    method_layout = QtWidgets.QHBoxLayout()\n    method_layout.addWidget(with_linear)\n    method_layout.addWidget(with_sam)\n    layout.addLayout(method_layout)\n\n    # Keyframes button group\n    between_group = QtWidgets.QButtonGroup()\n    with_keyframes = QtWidgets.QRadioButton(\"Selected Keyframes\")\n    without_keyframes = QtWidgets.QRadioButton(\"Detected Frames\")\n    between_group.addButton(with_keyframes)\n    between_group.addButton(without_keyframes)\n\n    with_keyframes.toggled.connect(lambda: config.update({'interpolationDefType': 'key' * with_keyframes.isChecked()}))\n    without_keyframes.toggled.connect(lambda: config.update({'interpolationDefType': 'all' * without_keyframes.isChecked()}))\n\n    layout.addWidget(between_label)\n    keyframes_layout = QtWidgets.QHBoxLayout()\n    keyframes_layout.addWidget(with_keyframes)\n    keyframes_layout.addWidget(without_keyframes)\n    layout.addLayout(keyframes_layout)\n    \n    overwrite_checkBox = QtWidgets.QCheckBox(\"Overwrite used frames with SAM\")\n    overwrite_checkBox.setChecked(config['interpolationOverwrite'])\n    overwrite_checkBox.toggled.connect(lambda: config.update({'interpolationOverwrite': overwrite_checkBox.isChecked()}))\n    layout.addWidget(overwrite_checkBox)\n    \n    show_unshow_overwrite()\n    # for some reason you must check linear then sam to make it work\n    with_linear.setChecked(True)\n\n    buttonBox = QtWidgets.QDialogButtonBox(QtWidgets.QDialogButtonBox.StandardButton.Ok)\n    buttonBox.accepted.connect(dialog.accept)\n    layout.addWidget(buttonBox)\n\n    dialog.setLayout(layout)\n    result = dialog.exec()\n    return result, config\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/label_dialog.py",
    "content": "import re\n\nfrom qtpy import QT_VERSION\nfrom PyQt6 import QtCore\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\nfrom labelme.logger import logger\nimport labelme.utils\n\n\nQT5 = QT_VERSION[0] == \"5\"\n\n\n# TODO(unknown):\n# - Calculate optimal position so as not to go out of screen area.\n\n\nclass LabelQLineEdit(QtWidgets.QLineEdit):\n    def setListWidget(self, list_widget):\n        self.list_widget = list_widget\n\n    def keyPressEvent(self, e):\n        if e.key() in [QtCore.Qt.Key.Key_Up, QtCore.Qt.Key.Key_Down]:\n            self.list_widget.keyPressEvent(e)\n        else:\n            super(LabelQLineEdit, self).keyPressEvent(e)\n\n\nclass LabelDialog(QtWidgets.QDialog):\n    def __init__(\n        self,\n        text=\"Enter object label\",\n        parent=None,\n        labels=None,\n        sort_labels=True,\n        show_text_field=True,\n        completion=\"startswith\",\n        fit_to_content=None,\n        flags=None,\n    ):\n        if fit_to_content is None:\n            fit_to_content = {\"row\": False, \"column\": True}\n        self._fit_to_content = fit_to_content\n        super(LabelDialog, self).__init__(parent)\n        self.setWindowTitle(\"Edit Label\")\n\n        self.edit = LabelQLineEdit()\n        self.edit.setPlaceholderText(text)\n        self.edit.setValidator(labelme.utils.labelValidator())\n        self.edit.editingFinished.connect(self.postProcess)\n        if flags:\n            self.edit.textChanged.connect(self.updateFlags)\n        self.edit_group_id = QtWidgets.QLineEdit()\n        self.edit_group_id.setPlaceholderText(\"Tracking ID\")\n        self.edit_group_id.setValidator(\n            QtGui.QRegularExpressionValidator(QtCore.QRegularExpression(r\"\\d*\"), None)\n        )\n\n        self.edit_group_id_label = QtWidgets.QLabel()\n        self.edit_group_id_label.setText(\"Tracking ID\")\n\n        self.select_class_label = QtWidgets.QLabel()\n        self.select_class_label.setText(\"Class Name\")\n\n        # buttons\n        self.buttonBox = bb =  QtWidgets.QDialogButtonBox(\n            QtWidgets.QDialogButtonBox.StandardButton.Ok | QtWidgets.QDialogButtonBox.StandardButton.Cancel,\n            QtCore.Qt.Orientation.Horizontal,\n            self\n        )\n        bb.button(bb.StandardButton.Ok).setIcon(labelme.utils.newIcon(\"done\"))\n        bb.button(bb.StandardButton.Cancel).setIcon(labelme.utils.newIcon(\"undo\"))\n        bb.setCenterButtons(True)  # center the buttons\n        bb.accepted.connect(self.validate)\n        bb.rejected.connect(self.reject)\n        \n        # label_list\n        self.labelList = QtWidgets.QListWidget()\n        if self._fit_to_content[\"row\"]:\n            self.labelList.setHorizontalScrollBarPolicy(\n                QtCore.Qt.ScrollBarPolicy.ScrollBarAlwaysOff\n            )\n        if self._fit_to_content[\"column\"]:\n            self.labelList.setVerticalScrollBarPolicy(\n                QtCore.Qt.ScrollBarPolicy.ScrollBarAlwaysOff\n            )\n        self._sort_labels = sort_labels\n        if labels:\n            self.labelList.addItems(labels)\n        if self._sort_labels:\n            self.labelList.sortItems()\n        else:\n            self.labelList.setDragDropMode(\n                QtWidgets.QAbstractItemView.InternalMove\n            )\n        self.labelList.currentItemChanged.connect(self.labelSelected)\n        self.labelList.itemDoubleClicked.connect(self.labelDoubleClicked)\n        self.edit.setListWidget(self.labelList)\n\n        self.labelListLabel = QtWidgets.QLabel()\n        self.labelListLabel.setText(\"Select From Class List\")\n        # label_flags\n        if flags is None:\n            flags = {}\n        self._flags = flags\n        self.flagsLayout = QtWidgets.QVBoxLayout()\n        self.resetFlags()\n        self.edit.textChanged.connect(self.updateFlags)\n\n        # confidence\n        self.confidenceEdit = QtWidgets.QLineEdit()\n        self.confidenceEdit.setPlaceholderText('Confidence')\n\n        # Add a validator to accept only floats between 0 and 1\n        validator = QtGui.QDoubleValidator(0, 1, 2, self.confidenceEdit)\n        self.confidenceEdit.setValidator(validator)\n\n        # add title before confidence\n        self.confidenceEditLabel = QtWidgets.QLabel()\n        self.confidenceEditLabel.setText('Confidence')\n        \n\n\n        layout = QtWidgets.QVBoxLayout()\n        layout.addItem(self.flagsLayout)\n\n        layout.addWidget(self.select_class_label)\n        layout.addWidget(self.edit)\n        \n        # Create a vertical layout for the edit group ID label and edit\n        edit_group_id_layout = QtWidgets.QVBoxLayout()\n        edit_group_id_layout.addWidget(self.edit_group_id_label)\n        edit_group_id_layout.addWidget(self.edit_group_id)\n\n        # Create a vertical layout for the confidence label and edit\n        confidence_layout = QtWidgets.QVBoxLayout()\n        confidence_layout.addWidget(self.confidenceEditLabel)\n        confidence_layout.addWidget(self.confidenceEdit)\n        \n        # add both vertical layouts to a horizontal layout\n        horizontal_layout = QtWidgets.QHBoxLayout()\n        horizontal_layout.addItem(edit_group_id_layout)\n        horizontal_layout.addSpacing(10)  # add 10 pixels of space\n        horizontal_layout.addItem(confidence_layout)\n\n        # add the horizontal layout to the main layout\n        layout.addItem(horizontal_layout)\n\n        # add the label list and label list label to the main layout\n        layout.addWidget(self.labelListLabel)\n        layout.addWidget(self.labelList)\n        layout.addWidget(bb)\n        self.resize(300,200)\n        self.setLayout(layout)\n\n\n        # completion\n        completer = QtWidgets.QCompleter()\n        if not QT5 and completion != \"startswith\":\n            logger.warn(\n                \"completion other than 'startswith' is only \"\n                \"supported with Qt5. Using 'startswith'\"\n            )\n            completion = \"startswith\"\n        if completion == \"startswith\":\n            completer.setCompletionMode(QtWidgets.QCompleter.CompletionMode.InlineCompletion)\n            # Default settings.\n            # completer.setFilterMode(QtCore.Qt.MatchStartsWith)\n        elif completion == \"contains\":\n            completer.setCompletionMode(QtWidgets.QCompleter.CompletionMode.PopupCompletion)\n            completer.setFilterMode(QtCore.Qt.MatchFlag.MatchContains)\n        else:\n            raise ValueError(\"Unsupported completion: {}\".format(completion))\n        completer.setModel(self.labelList.model())\n        self.edit.setCompleter(completer)\n\n    def addLabelHistory(self, label):\n        if self.labelList.findItems(label, QtCore.Qt.MatchFlag.MatchExactly):\n            return\n        self.labelList.addItem(label)\n        if self._sort_labels:\n            self.labelList.sortItems()\n\n    def labelSelected(self, item):\n        self.edit.setText(item.text())\n\n    def validate(self):\n        text = self.edit.text()\n        if hasattr(text, \"strip\"):\n            text = text.strip()\n        else:\n            text = text.trimmed()\n        if text:\n            self.accept()\n\n    def labelDoubleClicked(self, item):\n        self.validate()\n\n    def postProcess(self):\n        text = self.edit.text()\n        if hasattr(text, \"strip\"):\n            text = text.strip()\n        else:\n            text = text.trimmed()\n        self.edit.setText(text)\n\n    def updateFlags(self, label_new):\n        # keep state of shared flags\n        flags_old = self.getFlags()\n\n        flags_new = {}\n        for pattern, keys in self._flags.items():\n            if re.match(pattern, label_new):\n                for key in keys:\n                    flags_new[key] = flags_old.get(key, False)\n        self.setFlags(flags_new)\n\n    def deleteFlags(self):\n        for i in reversed(range(self.flagsLayout.count())):\n            item = self.flagsLayout.itemAt(i).widget()\n            self.flagsLayout.removeWidget(item)\n            item.setParent(None)\n\n    def resetFlags(self, label=\"\"):\n        flags = {}\n        for pattern, keys in self._flags.items():\n            if re.match(pattern, label):\n                for key in keys:\n                    flags[key] = False\n        self.setFlags(flags)\n\n    def setFlags(self, flags):\n        self.deleteFlags()\n        for key in flags:\n            item = QtWidgets.QCheckBox(key, self)\n            item.setChecked(flags[key])\n            self.flagsLayout.addWidget(item)\n            item.show()\n\n    def getFlags(self):\n        flags = {}\n        for i in range(self.flagsLayout.count()):\n            item = self.flagsLayout.itemAt(i).widget()\n            print(type(item))\n            flags[item.text()] = item.isChecked()\n        return flags\n\n    def getGroupId(self):\n        group_id = self.edit_group_id.text()\n        if group_id:\n            return int(group_id)\n        return None\n        \n    def getContent(self):\n        content = self.confidenceEdit.text()\n        if content:\n            return content\n        return None\n        \n    def setContent(self, content):\n        if type(content) != str:\n            content = str(content)\n        self.confidenceEdit.setText(content)\n\n    def popUp(self, text=None, move=True, flags=None, group_id=None, content=None, skip_flag=False):\n        if self._fit_to_content[\"row\"]:\n            self.labelList.setMinimumHeight(\n                self.labelList.sizeHintForRow(0) * self.labelList.count() + 2\n            )\n        if self._fit_to_content[\"column\"]:\n            self.labelList.setMinimumWidth(\n                self.labelList.sizeHintForColumn(0) + 2\n            )\n        # if text is None, the previous label in self.edit is kept\n        if text is None:\n            text = self.edit.text()\n        # if content is None, make the self.confidenceEdit empty\n        if content is None:\n            content=\"\"\n        self.setContent(content)\n        if flags:\n            self.setFlags(flags)\n        else:\n            self.resetFlags(text)\n        self.edit.setText(text)\n        self.edit.setSelection(0, len(text))\n        if group_id is None:\n            self.edit_group_id.clear()\n        else:\n            self.edit_group_id.setText(str(group_id))\n        items = self.labelList.findItems(text, QtCore.Qt.MatchFlag.MatchFixedString)\n        if items:\n            if len(items) != 1:\n                logger.warning(\"Label list has duplicate '{}'\".format(text))\n            self.labelList.setCurrentItem(items[0])\n            row = self.labelList.row(items[0])\n            self.edit.completer().setCurrentRow(row)\n        self.edit.setFocus(QtCore.Qt.FocusReason.PopupFocusReason)\n        if move:\n            self.move(QtGui.QCursor.pos())\n        if skip_flag:\n            return self.edit.text(), self.getFlags(), self.getGroupId(), self.getContent()\n        if self.exec():\n            return self.edit.text(), self.getFlags(), self.getGroupId(), self.getContent()\n        else:\n            return None, None, None, None\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/label_list_widget.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6.QtCore import Qt\nfrom PyQt6 import QtGui\nfrom PyQt6.QtGui import QPalette\nfrom PyQt6 import QtWidgets\nfrom PyQt6.QtWidgets import QStyle\n\n\n# https://stackoverflow.com/a/2039745/4158863\nclass HTMLDelegate(QtWidgets.QStyledItemDelegate):\n    def __init__(self, parent=None):\n        super(HTMLDelegate, self).__init__()\n        self.doc = QtGui.QTextDocument(self)\n\n    def paint(self, painter, option, index):\n        painter.save()\n\n        options = QtWidgets.QStyleOptionViewItem(option)\n\n        self.initStyleOption(options, index)\n        self.doc.setHtml(options.text)\n        options.text = \"\"\n\n        style = (\n            QtWidgets.QApplication.style()\n            if options.widget is None\n            else options.widget.style()\n        )\n        style.drawControl(QStyle.ControlElement.CE_ItemViewItem, options, painter)\n\n        ctx = QtGui.QAbstractTextDocumentLayout.PaintContext()\n\n        if option.state & QStyle.StateFlag.State_Selected:\n            ctx.palette.setColor(\n                QPalette.ColorRole.Text,\n                option.palette.color(\n                    QPalette.ColorGroup.Active, QPalette.ColorRole.HighlightedText\n                ),\n            )\n        else:\n            ctx.palette.setColor(\n                QPalette.ColorRole.Text,\n                option.palette.color(QPalette.ColorGroup.Active, QPalette.ColorRole.Text),\n            )\n\n        textRect = style.subElementRect(QStyle.SubElement.SE_ItemViewItemText, options)\n\n        if index.column() != 0:\n            textRect.adjust(5, 0, 0, 0)\n\n        thefuckyourshitup_constant = 4\n        margin = (option.rect.height() - options.fontMetrics.height()) // 2\n        margin = margin - thefuckyourshitup_constant\n        textRect.setTop(textRect.top() + margin)\n\n        painter.translate(textRect.topLeft())\n        painter.setClipRect(textRect.translated(-textRect.topLeft()))\n        self.doc.documentLayout().draw(painter, ctx)\n\n        painter.restore()\n\n    def sizeHint(self, option, index):\n        thefuckyourshitup_constant = 4\n        return QtCore.QSize(\n            self.doc.idealWidth(),\n            self.doc.size().height() - thefuckyourshitup_constant,\n        )\n\n\nclass LabelListWidgetItem(QtGui.QStandardItem):\n    def __init__(self, text=None, shape=None):\n        super(LabelListWidgetItem, self).__init__()\n        self.setText(text)\n\n        self.setShape(shape)\n\n        self.setCheckable(True)\n        self.setCheckState(Qt.CheckState.Checked)\n        self.setEditable(False)\n        self.setTextAlignment(Qt.AlignmentFlag.AlignBottom)\n        font = QtGui.QFont(\"Arial\", 10)\n        self.setFont(font)\n\n    def clone(self):\n        return LabelListWidgetItem(self.text(), self.shape())\n\n    def setShape(self, shape):\n        self.setData(shape, Qt.ItemDataRole.UserRole)\n\n    def shape(self):\n        return self.data(Qt.ItemDataRole.UserRole)\n\n    def __hash__(self):\n        return id(self)\n\n    def __repr__(self):\n        return '{}(\"{}\")'.format(self.__class__.__name__, self.text())\n\n\nclass StandardItemModel(QtGui.QStandardItemModel):\n\n    itemDropped = QtCore.pyqtSignal()\n\n    def removeRows(self, *args, **kwargs):\n        ret = super().removeRows(*args, **kwargs)\n        self.itemDropped.emit()\n        return ret\n\n\nclass LabelListWidget(QtWidgets.QListView):\n\n    itemDoubleClicked = QtCore.pyqtSignal(LabelListWidgetItem)\n    itemSelectionChanged = QtCore.pyqtSignal(list, list)\n\n    def __init__(self):\n        super(LabelListWidget, self).__init__()\n        self._selectedItems = []\n\n        self.setWindowFlags(Qt.WindowType.Window)\n        self.setModel(StandardItemModel())\n        self.model().setItemPrototype(LabelListWidgetItem())\n        self.setItemDelegate(HTMLDelegate())\n        self.setSelectionMode(QtWidgets.QAbstractItemView.SelectionMode.ExtendedSelection)\n        self.setDragDropMode(QtWidgets.QAbstractItemView.DragDropMode.InternalMove)\n        self.setDefaultDropAction(QtCore.Qt.DropAction.MoveAction)\n\n        self.doubleClicked.connect(self.itemDoubleClickedEvent)\n        self.selectionModel().selectionChanged.connect(\n            self.itemSelectionChangedEvent\n        )\n\n    def __len__(self):\n        return self.model().rowCount()\n\n    def __getitem__(self, i):\n        return self.model().item(i)\n\n    def __iter__(self):\n        for i in range(len(self)):\n            yield self[i]\n\n    @property\n    def itemDropped(self):\n        return self.model().itemDropped\n\n    @property\n    def itemChanged(self):\n        return self.model().itemChanged\n\n    def itemSelectionChangedEvent(self, selected, deselected):\n        selected = [self.model().itemFromIndex(i) for i in selected.indexes()]\n        deselected = [\n            self.model().itemFromIndex(i) for i in deselected.indexes()\n        ]\n        self.itemSelectionChanged.emit(selected, deselected)\n\n    def itemDoubleClickedEvent(self, index):\n        self.itemDoubleClicked.emit(self.model().itemFromIndex(index))\n\n    def selectedItems(self):\n        return [self.model().itemFromIndex(i) for i in self.selectedIndexes()]\n\n    def scrollToItem(self, item):\n        self.scrollTo(self.model().indexFromItem(item))\n\n    def addItem(self, item):\n        if not isinstance(item, LabelListWidgetItem):\n            raise TypeError(\"item must be LabelListWidgetItem\")\n        self.model().setItem(self.model().rowCount(), 0, item)\n        item.setSizeHint(self.itemDelegate().sizeHint(None, None))\n\n    def removeItem(self, item):\n        index = self.model().indexFromItem(item)\n        self.model().removeRows(index.row(), 1)\n\n    def selectItem(self, item):\n        index = self.model().indexFromItem(item)\n        self.selectionModel().select(index, QtCore.QItemSelectionModel.SelectionFlag.Select)\n\n    def findItemByShape(self, shape):\n        for row in range(self.model().rowCount()):\n            item = self.model().item(row, 0)\n            if item.shape() == shape:\n                return item\n        raise ValueError(\"cannot find shape: {}\".format(shape))\n\n    def clear(self):\n        self.model().clear()\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/links.py",
    "content": "import webbrowser\n\ndef open_git_hub():\n    \"\"\"\n    Opens the GitHub repository for the DLTA-AI project in the default web browser.\n\n    Parameters:\n    None\n\n    Returns:\n    None\n    \"\"\"\n\n    # Open the GitHub repository in the default web browser\n    webbrowser.open('https://github.com/0ssamaak0/DLTA-AI')  \n\ndef open_issue():\n    \"\"\"\n    Opens the GitHub issues page for the DLTA-AI project in the default web browser.\n\n    Parameters:\n    None\n\n    Returns:\n    None\n    \"\"\"\n\n    # Open the GitHub issues page in the default web browser\n    webbrowser.open('https://github.com/0ssamaak0/DLTA-AI/issues')\n\ndef open_license():\n    \"\"\"\n    Opens the license file for the DLTA-AI project in the default web browser.\n\n    Parameters:\n    None\n\n    Returns:\n    None\n    \"\"\"\n\n    # Open the license file in the default web browser\n    webbrowser.open('https://github.com/0ssamaak0/DLTA-AI/blob/master/LICENSE')\n\ndef open_guide():\n    \"\"\"\n    Opens the guide for the DLTA-AI project in the default web browser.\n\n    Parameters:\n    None\n\n    Returns:\n    None\n    \"\"\"\n\n    # Open the guide in the default web browser\n    webbrowser.open('https://0ssamaak0.github.io/DLTA-AI/')\n\ndef open_release(link = None):\n    \"\"\"\n    Opens the release page for the DLTA-AI project in the default web browser.\n\n    Parameters:\n    link (str): The link to the release page. If None, the default link will be used.\n\n    Returns:\n    None\n    \"\"\"\n    # Import necessary modules\n    import webbrowser\n\n    # If no link was provided, use the default link\n    if link is None:\n        link = 'https://github.com/0ssamaak0/DLTA-AI/releases'\n    else:\n        link = \"https://github.com/\" + link\n\n    # Open the release page in the default web browser\n    webbrowser.open(link)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/merge_feature_UI.py",
    "content": "import json\nfrom PyQt6 import QtWidgets\nfrom PyQt6 import QtCore\n\n\n# create an interface for merging features\nclass MergeFeatureUI():\n    def __init__(self, parent):\n        self.parent = parent\n        self.selectedmodels = []\n\n    # merge dialog \n    def mergeSegModels(self):\n        # add a resizable and scrollable dialog that contains all the models and allow the user to select among them using checkboxes\n        models = []\n        with open(\"saved_models.json\") as json_file:\n            data = json.load(json_file)\n            for model in data.keys():\n                if \"YOLOv8\" not in model:\n                    models.append(model)\n        # ExplorerMerge = ModelExplorerDialog(merge=True)\n        # ExplorerMerge.adjustSize()\n        # ExplorerMerge.resize(\n        #     int(ExplorerMerge.width() * 2), int(ExplorerMerge.height() * 1.5))\n        # ExplorerMerge.exec()\n\n        dialog = QtWidgets.QDialog(self.parent)\n        dialog.setWindowTitle('Select Models')\n        dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n        dialog.setWindowModality(QtCore.Qt.WindowModality.ApplicationModal)\n        dialog.resize(200, 250)\n        dialog.setMinimumSize(QtCore.QSize(200, 200))\n        verticalLayout = QtWidgets.QVBoxLayout(dialog)\n        verticalLayout.setObjectName(\"verticalLayout\")\n        scrollArea = QtWidgets.QScrollArea(dialog)\n        scrollArea.setWidgetResizable(True)\n        scrollArea.setObjectName(\"scrollArea\")\n        scrollAreaWidgetContents = QtWidgets.QWidget()\n        scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 478, 478))\n        scrollAreaWidgetContents.setObjectName(\"scrollAreaWidgetContents\")\n        verticalLayout_2 = QtWidgets.QVBoxLayout(scrollAreaWidgetContents)\n        verticalLayout_2.setObjectName(\"verticalLayout_2\")\n        self.scrollAreaWidgetContents = scrollAreaWidgetContents\n        scrollArea.setWidget(scrollAreaWidgetContents)\n        verticalLayout.addWidget(scrollArea)\n        buttonBox = QtWidgets.QDialogButtonBox(dialog)\n        buttonBox.setOrientation(QtCore.Qt.Orientation.Horizontal)\n        buttonBox.setStandardButtons(\n            QtWidgets.QDialogButtonBox.StandardButton.Cancel | QtWidgets.QDialogButtonBox.StandardButton.Ok)\n        buttonBox.setObjectName(\"buttonBox\")\n        verticalLayout.addWidget(buttonBox)\n        buttonBox.accepted.connect(dialog.accept)\n        buttonBox.rejected.connect(dialog.reject)\n        self.models = []\n        for i in range(len(models)):\n            self.models.append(QtWidgets.QCheckBox(models[i], dialog))\n            verticalLayout_2.addWidget(self.models[i])\n        dialog.show()\n        dialog.exec()\n        self.selectedmodels.clear()\n        for i in range(len(self.models)):\n            if self.models[i].isChecked():\n                self.selectedmodels.append(self.models[i].text())\n        print(self.selectedmodels)\n        return self.selectedmodels\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/notification.py",
    "content": "import os\n\ndef PopUp(text):\n    \"\"\"\n    Sends a desktop notification with the given text.\n\n    Args:\n        text (str): The text to display in the notification.\n\n    Returns:\n        None\n    \"\"\"\n    try:\n        from notifypy import Notify\n        # Create a Notify object with the default title\n        notification = Notify(default_notification_title=\"DLTA-AI\")\n\n        # Set the message of the notification to the given text\n        notification.message = text\n\n        # Set the notification icon\n        print(os.getcwd())\n        notification.icon = \"labelme/icons/icon.ico\"\n\n        # Send the notification asynchronously\n        notification.send(block=False)\n    except Exception as e:\n        print(e)\n        print(\"please install notifypy to get desktop notifications\")\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/open_file.py",
    "content": "import os\nimport subprocess\nimport platform\n\n\n\n\ndef PopUp():\n    \"\"\"\n    Open a file with the default application for the file type.\n\n    Args:\n        filename (str): The name of the file to open.\n\n    Raises:\n        OSError: If the file cannot be opened.\n\n    Returns:\n        None\n    \"\"\"\n    filename = os.path.join(os.getcwd(), 'labelme/utils/custom_exports.py')\n    print(filename)\n    # Determine the platform and use the appropriate command to open the file\n    # Windows\n    if platform.system() == 'Windows':\n        os.startfile(filename)\n    # macOS\n    elif platform.system() == 'Darwin':\n        os.system(f'open {filename}')\n    else:\n        try:\n            opener = \"open\" if platform.system() == \"Darwin\" else \"xdg-open\"\n            subprocess.call([opener, filename])\n        except OSError:\n            print(f\"Could not open file: {filename}\")\n\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/preferences_UI.py",
    "content": "import yaml\nfrom PyQt6 import QtWidgets, QtGui, QtCore\n\n\n\n\n\ndef PopUp():\n    \"\"\"\n\n    Description:\n    This function displays a dialog box with preferences for the LabelMe application, including theme and notification settings.\n\n    Parameters:\n    This function takes no parameters.\n\n    Returns:\n    If the user clicks the OK button, this function writes the new theme and notification settings to the config file and returns `QtWidgets.QDialog.DialogCode.Accepted`. If the user clicks the Cancel button, this function does not write any changes to the config file and returns `QtWidgets.QDialog.Rejected`.\n\n    Libraries:\n    This function requires the following libraries to be installed:\n    - yaml\n    - PyQt6.QtWidgets\n    - PyQt6.QtGui\n    - PyQt6.QtCore\n    \"\"\"\n    \n\n    with open(\"labelme/config/default_config.yaml\", \"r\") as f:\n        config = yaml.load(f, Loader=yaml.FullLoader)\n\n    # Create the dialog\n    dialog = QtWidgets.QDialog()\n    dialog.setWindowTitle(\"Preferences\")\n    dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n    # Create the labels\n    themeLabel = QtWidgets.QLabel(\"Theme Settings 🌓\")\n    themeLabel.setFont(QtGui.QFont(\"Arial\", 10, QtGui.QFont.Weight.Bold))\n    theme_note_label = QtWidgets.QLabel(\"Requires app restart to take effect\")\n\n    notificationLabel = QtWidgets.QLabel(\"Notifications Settings 🔔\")\n    notificationLabel.setFont(QtGui.QFont(\"Arial\", 10, QtGui.QFont.Weight.Bold))\n    notification_note_label = QtWidgets.QLabel(\"Notifications works only for long tasks and if the app isn't focused\")\n\n    # Load the current theme from the config file\n    current_theme = config[\"theme\"]\n    current_mute =  config[\"mute\"]\n\n    # Create the radio buttons\n    autoButton = QtWidgets.QRadioButton(\"OS Default\")\n    lightButton = QtWidgets.QRadioButton(\"Light\")\n    darkButton = QtWidgets.QRadioButton(\"Dark\")\n\n    # Set the current theme as the default selection\n    if current_theme == \"auto\":\n        autoButton.setChecked(True)\n    elif current_theme == \"light\":\n        lightButton.setChecked(True)\n    elif current_theme == \"dark\":\n        darkButton.setChecked(True)\n\n    # Create the images\n    autoImage = QtGui.QPixmap(\"labelme/icons/auto-img.png\").scaledToWidth(128)\n    lightImage = QtGui.QPixmap(\"labelme/icons/light-img.png\").scaledToWidth(128)\n    darkImage = QtGui.QPixmap(\"labelme/icons/dark-img.png\").scaledToWidth(128)\n\n    # Create the image labels\n    autoLabel = QtWidgets.QLabel()\n    autoLabel.setPixmap(autoImage)\n    lightLabel = QtWidgets.QLabel()\n    lightLabel.setPixmap(lightImage)\n    darkLabel = QtWidgets.QLabel()\n    darkLabel.setPixmap(darkImage)\n\n    # Create the layout\n    layout = QtWidgets.QVBoxLayout()\n    layout.addWidget(themeLabel)\n    layout.addWidget(theme_note_label)\n    buttonLayout = QtWidgets.QHBoxLayout()\n    buttonLayout.addWidget(autoButton)\n    buttonLayout.addWidget(lightButton)\n    buttonLayout.addWidget(darkButton)\n    layout.addLayout(buttonLayout)\n\n    # Create the image layout\n    imageLayout = QtWidgets.QHBoxLayout()\n    imageLayout.addWidget(autoLabel)\n    imageLayout.addWidget(lightLabel)\n    imageLayout.addWidget(darkLabel)\n    layout.addLayout(imageLayout)\n\n    # Create the notification checkbox\n    notificationCheckbox = QtWidgets.QCheckBox(\"Mute Notifications\")\n    notificationCheckbox.setChecked(current_mute)\n    layout.addWidget(notificationLabel)\n    layout.addWidget(notification_note_label)\n    layout.addWidget(notificationCheckbox)\n\n    dialog.setLayout(layout)\n\n    # Create the OK and Cancel buttons\n    okButton = QtWidgets.QPushButton(\"OK\")\n    cancelButton = QtWidgets.QPushButton(\"Cancel\")\n\n    # Add the buttons to a QHBoxLayout\n    buttonLayout = QtWidgets.QHBoxLayout()\n    buttonLayout.addWidget(okButton)\n    buttonLayout.addWidget(cancelButton)\n\n    # Add the QHBoxLayout to the QVBoxLayout\n    layout.addLayout(buttonLayout)\n\n    # Connect the OK and Cancel buttons to the accept and reject functions\n    okButton.clicked.connect(dialog.accept)\n    cancelButton.clicked.connect(dialog.reject)\n\n    # Show the dialog\n    if dialog.exec() == QtWidgets.QDialog.DialogCode.Accepted:\n        # Write the new theme and notification settings to the config file\n        if autoButton.isChecked():\n            theme = \"auto\"\n        elif lightButton.isChecked():\n            theme = \"light\"\n        elif darkButton.isChecked():\n            theme = \"dark\"\n        mute = notificationCheckbox.isChecked()\n        with open(\"labelme/config/default_config.yaml\", \"r\") as f:\n            config = yaml.load(f, Loader=yaml.FullLoader)\n        config[\"theme\"] = theme\n        config[\"mute\"] = mute\n        with open(\"labelme/config/default_config.yaml\", \"w\") as f:\n            yaml.dump(config, f)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/runtime_data_UI.py",
    "content": "from PyQt6.QtWidgets import QDialog, QLabel, QVBoxLayout\nfrom PyQt6.QtGui import QFont\nfrom PyQt6 import QtCore\nimport psutil\nimport torch\n\n\n\ndef PopUp():\n    \"\"\"\n\n    Description:\n    This function displays a dialog box with information about the runtime data of the system, including GPU and RAM stats.\n\n    Parameters:\n    This function takes no parameters.\n\n    Returns:\n    This function does not return anything.\n\n    Libraries:\n    This function requires the following libraries to be installed:\n    - PyQt6\n    - psutil\n    - torch\n    \"\"\"\n\n    # Create a dialog box to display the runtime data\n    dialog = QDialog()\n    dialog.setWindowTitle(\"Runtime data\")\n    dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n    layout = QVBoxLayout(dialog)\n    layout.setContentsMargins(20, 20, 20, 20)\n    layout.setSpacing(10)\n\n    # Set font styles for the title and normal text\n    title_font = QFont()\n    title_font.setPointSize(12)\n    title_font.setBold(True)\n\n    normal_font = QFont()\n    normal_font.setPointSize(10)\n\n    # If CUDA is available, display GPU stats\n    if torch.cuda.is_available():\n        device_name = torch.cuda.get_device_name(0)\n        gpu_title_label = QLabel(\"Device Stats\")\n        gpu_title_label.setFont(title_font)\n        layout.addWidget(gpu_title_label)\n\n        gpu_name_label = QLabel(f\"GPU Name: {device_name}\")\n        gpu_name_label.setFont(normal_font) \n        layout.addWidget(gpu_name_label)\n\n        total_vram = round(torch.cuda.get_device_properties(0).total_memory / (1024 ** 3), 2)\n        used_vram = round(torch.cuda.memory_allocated(0) / (1024 ** 3), 2)\n        gpu_vram_label = QLabel(f\"Total GPU VRAM: {total_vram} GB\\nUsed: {used_vram} GB\")\n        gpu_vram_label.setFont(normal_font)\n        layout.addWidget(gpu_vram_label)\n\n    # If CUDA is not available, display CPU stats\n    else:\n        cpu_label = QLabel(\"DLTA-AI is Using CPU\")\n        cpu_label.setFont(title_font)\n        layout.addWidget(cpu_label)\n\n    # Display RAM stats\n    ram_title_label = QLabel(\"RAM Stats\")\n    ram_title_label.setFont(title_font)\n    layout.addWidget(ram_title_label)\n\n    total_ram = round(psutil.virtual_memory().total / (1024 ** 3), 2)\n    used_ram = round(psutil.virtual_memory().used / (1024 ** 3), 2)\n    ram_label = QLabel(f\"Total RAM: {total_ram} GB\\nUsed: {used_ram} GB\")\n    ram_label.setFont(normal_font)\n    layout.addWidget(ram_label)\n\n    # Display the dialog box\n    dialog.exec()"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/scaleObject_UI.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6.QtCore import Qt\nfrom PyQt6 import QtWidgets\nfrom labelme.utils.helpers.mathOps import scaleQTshape\n\n\n\n\ndef PopUp(self):\n    \n    \"\"\"\n    Summary:\n        Show a dialog to scale a shape.\n        \n    Args:\n        self: the main window object to access the canvas\n        \n    Returns:\n        result: the result of the dialog\n    \"\"\"\n    \n    originalshape = self.canvas.selectedShapes[0].copy()\n    xx = [originalshape.points[i].x()\n            for i in range(len(originalshape.points))]\n    yy = [originalshape.points[i].y()\n            for i in range(len(originalshape.points))]\n    center = [sum(xx) / len(xx), sum(yy) / len(yy)]\n\n    dialog = QtWidgets.QDialog()\n    dialog.setWindowTitle(\"Scaling\")\n    dialog.setWindowModality(Qt.WindowModality.ApplicationModal)\n    dialog.resize(400, 400)\n\n    layout = QtWidgets.QVBoxLayout()\n\n    label = QtWidgets.QLabel(\n        \"Scaling object with ID: \" + str(originalshape.group_id) + \"\\n \")\n    label.setStyleSheet(\n        \"QLabel { font-weight: bold; }\")\n    layout.addWidget(label)\n\n    xLabel = QtWidgets.QLabel()\n    xLabel.setText(\"Width(x) factor is: \" + \"100\" + \"%\")\n    yLabel = QtWidgets.QLabel()\n    yLabel.setText(\"Hight(y) factor is: \" + \"100\" + \"%\")\n\n    xSlider = QtWidgets.QSlider(QtCore.Qt.Orientation.Horizontal)\n    xSlider.setMinimum(50)\n    xSlider.setMaximum(150)\n    xSlider.setValue(100)\n    xSlider.setTickPosition(\n        QtWidgets.QSlider.TickPosition.TicksBelow)\n    xSlider.setTickInterval(1)\n    xSlider.setMaximumWidth(750)\n    xSlider.valueChanged.connect(lambda: xLabel.setText(\n        \"Width(x) factor is: \" + str(xSlider.value()) + \"%\"))\n    xSlider.valueChanged.connect(lambda: scaleQTshape(self,\n        originalshape, center, xSlider.value(), ySlider.value()))\n\n    ySlider = QtWidgets.QSlider(QtCore.Qt.Orientation.Vertical)\n    ySlider.setMinimum(50)\n    ySlider.setMaximum(150)\n    ySlider.setValue(100)\n    ySlider.setTickPosition(\n        QtWidgets.QSlider.TickPosition.TicksBelow)\n    ySlider.setTickInterval(1)\n    ySlider.setMaximumWidth(750)\n    ySlider.valueChanged.connect(lambda: yLabel.setText(\n        \"Hight(y) factor is: \" + str(ySlider.value()) + \"%\"))\n    ySlider.valueChanged.connect(lambda: scaleQTshape(self,\n        originalshape, center, xSlider.value(), ySlider.value()))\n\n    layout.addWidget(xLabel)\n    layout.addWidget(yLabel)\n    layout.addWidget(xSlider)\n    layout.addWidget(ySlider)\n\n    buttonBox = QtWidgets.QDialogButtonBox(\n        QtWidgets.QDialogButtonBox.StandardButton.Ok)\n    buttonBox.accepted.connect(dialog.accept)\n    layout.addWidget(buttonBox)\n    dialog.setLayout(layout)\n    result = dialog.exec()\n    return result    \n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/segmentation_options_UI.py",
    "content": "# relevant imports for the functions\nfrom PyQt6 import QtCore\nfrom PyQt6 import QtWidgets\nimport yaml\nfrom ..utils.helpers.mathOps import color_palette\n\n\ncoco_classes = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',\n                'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',\n                'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',\n                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',\n                'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',\n                'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',\n                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard',\n                'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n                'scissors', 'teddy bear', 'hair drier', 'toothbrush']\n# make a list of 12 unique colors as we will use them to draw bounding boxes of different classes in different colors\n# so the calor palette will be used to draw bounding boxes of different classes in different colors\n# the color pallette should have the famous 12 colors as red, green, blue, yellow, cyan, magenta, white, black, gray, brown, pink, and orange in bgr format\n\n\nclass SegmentationOptionsUI():\n    def __init__(self, parent):\n        self.parent = parent\n        self.conf_threshold = 0.3\n        self.iou_threshold = 0.5\n        with open (\"labelme/config/default_config.yaml\") as f:\n            self.config = yaml.load(f, Loader=yaml.FullLoader)\n        self.default_classes = self.config[\"default_classes\"]\n        try:\n            self.selectedclasses = {}\n            for class_ in self.default_classes:\n                if class_ in coco_classes:\n                    index = coco_classes.index(class_)\n                    self.selectedclasses[index] = class_\n        except:\n            self.selectedclasses = {i:class_ for i,class_ in enumerate(coco_classes)}\n            print(\"error in loading the default classes from the config file, so we will use all the coco classes\")\n        \n        \n\n    # get the thresold as input from the user\n    def setConfThreshold(self, prev_threshold=0.3):\n        dialog = QtWidgets.QDialog(self.parent)\n        dialog.setWindowTitle('Threshold Selector')\n        dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n        layout = QtWidgets.QVBoxLayout(dialog)\n\n        label = QtWidgets.QLabel('Enter Confidence Threshold')\n        layout.addWidget(label)\n\n        slider = QtWidgets.QSlider(QtCore.Qt.Orientation.Horizontal)\n        slider.setMinimum(1)\n        slider.setMaximum(100)\n        slider.setValue(int(prev_threshold * 100))\n\n        text_input = QtWidgets.QLineEdit(str(prev_threshold))\n\n        def on_slider_change(value):\n            text_input.setText(str(value / 100))\n\n        def on_text_change(text):\n            try:\n                value = float(text)\n                slider.setValue(int(value * 100))\n            except ValueError:\n                pass\n\n        slider.valueChanged.connect(on_slider_change)\n        text_input.textChanged.connect(on_text_change)\n\n        layout.addWidget(slider)\n        layout.addWidget(text_input)\n\n        button_box = QtWidgets.QDialogButtonBox(QtWidgets.QDialogButtonBox.StandardButton.Ok | QtWidgets.QDialogButtonBox.StandardButton.Cancel)\n        layout.addWidget(button_box)\n\n        def on_ok():\n            threshold = float(text_input.text())\n            dialog.accept()\n            return threshold\n\n        def on_cancel():\n            dialog.reject()\n            return prev_threshold\n\n        button_box.accepted.connect(on_ok)\n        button_box.rejected.connect(on_cancel)\n\n        if dialog.exec() == QtWidgets.QDialog.DialogCode.Accepted:\n            return slider.value() / 100\n        else:\n            return prev_threshold\n        \n\n    def setIOUThreshold(self, prev_threshold=0.5):\n        dialog = QtWidgets.QDialog(self.parent)\n        dialog.setWindowTitle('Threshold Selector')\n        dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n        layout = QtWidgets.QVBoxLayout(dialog)\n\n        label = QtWidgets.QLabel('Enter IOU Threshold')\n        layout.addWidget(label)\n\n        slider = QtWidgets.QSlider(QtCore.Qt.Orientation.Horizontal)\n        slider.setMinimum(1)\n        slider.setMaximum(100)\n        slider.setValue(int(prev_threshold * 100))\n\n        text_input = QtWidgets.QLineEdit(str(prev_threshold))\n\n        def on_slider_change(value):\n            text_input.setText(str(value / 100))\n\n        def on_text_change(text):\n            try:\n                value = float(text)\n                slider.setValue(int(value * 100))\n            except ValueError:\n                pass\n\n        slider.valueChanged.connect(on_slider_change)\n        text_input.textChanged.connect(on_text_change)\n\n        layout.addWidget(slider)\n        layout.addWidget(text_input)\n\n        button_box = QtWidgets.QDialogButtonBox(QtWidgets.QDialogButtonBox.StandardButton.Ok | QtWidgets.QDialogButtonBox.StandardButton.Cancel)\n        layout.addWidget(button_box)\n\n        def on_ok():\n            threshold = float(text_input.text())\n            dialog.accept()\n            return threshold\n\n        def on_cancel():\n            dialog.reject()\n            return prev_threshold\n\n        button_box.accepted.connect(on_ok)\n        button_box.rejected.connect(on_cancel)\n\n        if dialog.exec() == QtWidgets.QDialog.DialogCode.Accepted:\n            return slider.value() / 100\n        else:\n            return prev_threshold\n\n\n    # add a resizable and scrollable dialog that contains all coco classes and allow the user to select among them using checkboxes\n    def selectClasses(self):\n        \"\"\"\n        Display a dialog box that allows the user to select which classes to annotate.\n\n        The function creates a QDialog object and adds various widgets to it, including a QScrollArea that contains QCheckBox\n        widgets for each class. The function sets the state of each QCheckBox based on whether the class is in the\n        self.selectedclasses dictionary. The function also adds \"Select All\", \"Deselect All\", \"Select Classes\", \"Set as Default\",\n        and \"Cancel\" buttons to the dialog box. When the user clicks the \"Select Classes\" button, the function saves the selected\n        classes to the self.selectedclasses dictionary and returns it.\n\n        :return: A dictionary that maps class indices to class names for the selected classes.\n        \"\"\"\n        # Create a new dialog box\n        dialog = QtWidgets.QDialog(self.parent)\n        dialog.setWindowTitle('Select Classes')\n        dialog.setWindowModality(QtCore.Qt.WindowModality.ApplicationModal)\n        dialog.resize(500, 500)\n        dialog.setMinimumSize(QtCore.QSize(500, 500))\n        dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n\n        # Create a vertical layout for the dialog box\n        verticalLayout = QtWidgets.QVBoxLayout(dialog)\n        verticalLayout.setObjectName(\"verticalLayout\")\n\n        # Create a horizontal layout for the \"Select All\" and \"Deselect All\" buttons\n        horizontalLayout = QtWidgets.QHBoxLayout()\n        selectAllButton = QtWidgets.QPushButton(\"Select All\", dialog)\n        deselectAllButton = QtWidgets.QPushButton(\"Deselect All\", dialog)\n        horizontalLayout.addWidget(selectAllButton)\n        horizontalLayout.addWidget(deselectAllButton)\n        verticalLayout.addLayout(horizontalLayout)\n\n        # Create a scroll area for the class checkboxes\n        scrollArea = QtWidgets.QScrollArea(dialog)\n        scrollArea.setWidgetResizable(True)\n        scrollArea.setObjectName(\"scrollArea\")\n        scrollAreaWidgetContents = QtWidgets.QWidget()\n        scrollAreaWidgetContents.setGeometry(QtCore.QRect(0, 0, 478, 478))\n        scrollAreaWidgetContents.setObjectName(\"scrollAreaWidgetContents\")\n        gridLayout = QtWidgets.QGridLayout(scrollAreaWidgetContents)\n        gridLayout.setObjectName(\"gridLayout\")\n        self.scrollAreaWidgetContents = scrollAreaWidgetContents\n        scrollArea.setWidget(scrollAreaWidgetContents)\n        verticalLayout.addWidget(scrollArea)\n\n        # Create a button box for the \"Select Classes\", \"Set as Default\", and \"Cancel\" buttons\n        buttonBox = QtWidgets.QDialogButtonBox(dialog)\n        buttonBox.setOrientation(QtCore.Qt.Orientation.Horizontal)\n        buttonBox.setStandardButtons(\n            QtWidgets.QDialogButtonBox.StandardButton.Cancel | QtWidgets.QDialogButtonBox.StandardButton.Ok)\n        buttonBox.setObjectName(\"buttonBox\")\n        buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok).setText(\"Select Classes\")\n        defaultButton = QtWidgets.QPushButton(\"Set as Default\", dialog)\n        buttonBox.addButton(defaultButton, QtWidgets.QDialogButtonBox.ButtonRole.ActionRole)\n\n        # Add the buttons to a QHBoxLayout\n        buttonLayout = QtWidgets.QHBoxLayout()\n        buttonLayout.addWidget(buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Ok))\n        buttonLayout.addWidget(defaultButton)\n        buttonLayout.addWidget(buttonBox.button(QtWidgets.QDialogButtonBox.StandardButton.Cancel))\n\n        # Add the QHBoxLayout to the QVBoxLayout\n        verticalLayout.addLayout(buttonLayout)\n\n        # Connect the button signals to their respective slots\n        buttonBox.accepted.connect(lambda: self.saveClasses(dialog))\n        buttonBox.rejected.connect(dialog.reject)\n        defaultButton.clicked.connect(lambda: self.saveClasses(dialog, True))\n\n        # Create a QCheckBox for each class and add it to the grid layout\n        self.classes = []\n        for i in range(len(coco_classes)):\n            self.classes.append(QtWidgets.QCheckBox(coco_classes[i], dialog))\n            row = i // 3\n            col = i % 3\n            gridLayout.addWidget(self.classes[i], row, col)\n\n        # Set the state of each QCheckBox based on whether the class is in the self.selectedclasses dictionary\n        for value in self.selectedclasses.values():\n            if value != None:\n                indx = coco_classes.index(value)\n                self.classes[indx].setChecked(True)\n\n        # Connect the \"Select All\" and \"Deselect All\" buttons to their respective slots\n        selectAllButton.clicked.connect(lambda: self.selectAll())\n        deselectAllButton.clicked.connect(lambda: self.deselectAll())\n\n        # Show the dialog box and wait for the user to close it\n        dialog.show()\n        dialog.exec()\n\n        # Save the selected classes to the self.selectedclasses dictionary and return it\n        self.selectedclasses.clear()\n        for i in range(len(self.classes)):\n            if self.classes[i].isChecked():\n                indx = coco_classes.index(self.classes[i].text())\n                self.selectedclasses[indx] = self.classes[i].text()\n        return self.selectedclasses\n\n    def saveClasses(self, dialog, is_default=False):\n        \"\"\"\n        Save the selected classes to the self.selectedclasses dictionary.\n\n        The function clears the self.selectedclasses dictionary and then iterates over the QCheckBox widgets for each class.\n        If a QCheckBox is checked, the function adds the corresponding class name to the self.selectedclasses dictionary. If the\n        is_default parameter is True, the function also updates the default_config.yaml file with the selected classes.\n\n        :param dialog: The QDialog object that contains the class selection dialog.\n        :param is_default: A boolean that indicates whether to update the default_config.yaml file with the selected classes.\n        \"\"\"\n        # Clear the self.selectedclasses dictionary\n        self.selectedclasses.clear()\n\n        # Iterate over the QCheckBox widgets for each class\n        for i in range(len(self.classes)):\n            if self.classes[i].isChecked():\n                indx = coco_classes.index(self.classes[i].text())\n                self.selectedclasses[indx] = self.classes[i].text()\n\n        # If is_default is True, update the default_config.yaml file with the selected classes\n        if is_default:\n            with open(\"labelme/config/default_config.yaml\", 'r') as f:\n                config = yaml.load(f, Loader=yaml.FullLoader)\n            config['default_classes'] = list(self.selectedclasses.values())\n            with open(\"labelme/config/default_config.yaml\", 'w') as f:\n                yaml.dump(config, f)\n\n        # Accept the dialog box\n        dialog.accept()\n\n    def selectAll(self):\n        \"\"\"\n        Select all classes in the class selection dialog.\n\n        The function iterates over the QCheckBox widgets for each class and sets their checked state to True.\n        \"\"\"\n        # Iterate over the QCheckBox widgets for each class and set their checked state to True\n        for checkbox in self.classes:\n            checkbox.setChecked(True)\n\n    def deselectAll(self):\n        \"\"\"\n        Deselect all classes in the class selection dialog.\n\n        The function iterates over the QCheckBox widgets for each class and sets their checked state to False.\n        \"\"\"\n        # Iterate over the QCheckBox widgets for each class and set their checked state to False\n        for checkbox in self.classes:\n            checkbox.setChecked(False)\n\n\n    \n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/shortcut_selector_UI.py",
    "content": "import yaml\nfrom PyQt6 import QtWidgets, QtGui, QtCore\n\n\ndef PopUp():\n    \"\"\"\n    Displays a dialog box for selecting and editing keyboard shortcuts for the application.\n\n    Parameters:\n    None\n\n    Returns:\n    None\n    \"\"\"\n\n    # Load the default shortcuts from the config file\n    shortcuts = {}\n    with open(\"labelme/config/default_config.yaml\", \"r\") as f:\n        config = yaml.load(f, Loader=yaml.FullLoader)\n        shortcuts = config.get(\"shortcuts\", {})\n\n    # Encode the shortcut names for display in the table\n    shortcuts_names_encode = {name: name.lower().capitalize().replace(\"_\", \" \").replace(\"Sam\", \"SAM\").replace(\"sam\", \"SAM\") for name in shortcuts.keys()}\n\n    # Decode the shortcut names back to their original form\n    shortcuts_names_decode = {value: key for key, value in shortcuts_names_encode.items()}\n\n    # Change the keys of the shortcuts dictionary to use the encoded names\n    shortcuts = {shortcuts_names_encode[key]: value for key, value in shortcuts.items()}\n\n    # Create a table to display the shortcuts\n    shortcut_table = QtWidgets.QTableWidget()\n    shortcut_table.setColumnCount(2)\n    shortcut_table.setHorizontalHeaderLabels(['Function', 'Shortcut'])\n    shortcut_table.setRowCount(len(shortcuts))\n    shortcut_table.verticalHeader().setVisible(False)\n\n    # Populate the table with the shortcut names and keys\n    row = 0\n    for name, key in shortcuts.items():\n        name_item = QtWidgets.QTableWidgetItem(name)\n        shortcut_item = QtWidgets.QTableWidgetItem(key)\n        shortcut_table.setItem(row, 0, name_item)\n        shortcut_table.setItem(row, 1, shortcut_item)\n        row += 1\n\n    # Define a function to handle clicks on the shortcut table\n    def on_shortcut_table_clicked(item):\n        row = item.row()\n        name_item = shortcut_table.item(row, 0)\n        name = name_item.text()\n        current_key = shortcuts[name]\n        key_edit = QtWidgets.QKeySequenceEdit(QtGui.QKeySequence(current_key))\n        key_edit.setWindowTitle(f\"Edit Shortcut for {name}\")\n        key_edit_label = QtWidgets.QLabel(\"Enter new shortcut for \" + name)\n        dialog = QtWidgets.QDialog()\n        dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n        dialog.setWindowTitle(\"Shortcut Selector\")\n        layout = QtWidgets.QVBoxLayout()\n        layout.addWidget(key_edit_label)\n        layout.addWidget(key_edit)\n        ok_button = QtWidgets.QPushButton(\"OK\")\n        ok_button.clicked.connect(dialog.accept)\n        null_hint_label = QtWidgets.QLabel(\"to remove shortcut, press 'Ctrl' only then click 'OK\") \n        layout.addWidget(ok_button)\n        layout.addWidget(null_hint_label)\n        dialog.setLayout(layout)\n\n        # If the user clicks OK, update the shortcut and table\n        if dialog.exec():\n            key = key_edit.keySequence().toString(QtGui.QKeySequence.SequenceFormat.NativeText)\n            if key in shortcuts.values() and list(shortcuts.keys())[list(shortcuts.values()).index(key)] != name:\n                conflicting_shortcut = list(shortcuts.keys())[list(shortcuts.values()).index(key)]\n                QtWidgets.QMessageBox.warning(None, \"Error\", f\"{key} is already assigned to {conflicting_shortcut}.\")\n            else:\n                if key == \"\":\n                    key = None\n                shortcuts[name] = key\n                shortcut_table.item(row, 1).setText(key)\n\n    def write_shortcuts_to_ui(config):\n        shortcuts = config.get(\"shortcuts\", {})\n        # Encode the shortcut names for display in the table\n        shortcuts_names_encode = {name: name.lower().capitalize().replace(\"_\", \" \").replace(\"Sam\", \"SAM\").replace(\"sam\", \"SAM\") for name in shortcuts.keys()}\n\n        # Change the keys of the shortcuts dictionary to use the encoded names\n        shortcuts = {shortcuts_names_encode[key]: value for key, value in shortcuts.items()}\n        \n        row = 0\n        for name, key in shortcuts.items():\n            name_item = QtWidgets.QTableWidgetItem(name)\n            shortcut_item = QtWidgets.QTableWidgetItem(key)\n            shortcut_table.setItem(row, 0, name_item)\n            shortcut_table.setItem(row, 1, shortcut_item)\n            row += 1\n\n    def on_reset_button_clicked():\n        \n        with open(\"labelme/config/default_config.yaml\", \"r\") as f:\n            config = yaml.load(f, Loader=yaml.FullLoader)\n        \n        write_shortcuts_to_ui(config)\n\n    def on_restore_button_clicked():\n        \n        with open(\"labelme/config/default_config_base.yaml\", \"r\") as f:\n            configBase = yaml.load(f, Loader=yaml.FullLoader)\n        with open(\"labelme/config/default_config.yaml\", \"r\") as f:\n            config = yaml.load(f, Loader=yaml.FullLoader)\n        config[\"shortcuts\"] = configBase[\"shortcuts\"]\n        \n        write_shortcuts_to_ui(config)\n\n    # Connect the on_shortcut_table_clicked function to the itemClicked signal of the shortcut table\n    shortcut_table.itemClicked.connect(on_shortcut_table_clicked)\n\n    # Create a dialog box to display the shortcut table\n    dialog = QtWidgets.QDialog()\n    dialog.setWindowFlags(dialog.windowFlags() & ~QtCore.Qt.WindowType.WindowContextHelpButtonHint)\n    dialog.setWindowTitle(\"Shortcuts\")\n    layout = QtWidgets.QVBoxLayout()\n    layout.addWidget(shortcut_table)\n    \n    ok_button = QtWidgets.QPushButton(\"OK\")\n    ok_button.clicked.connect(dialog.accept)\n    layout.addWidget(ok_button)\n    \n    reset_button = QtWidgets.QPushButton(\"Reset\")\n    reset_button.clicked.connect(on_reset_button_clicked)\n    layout.addWidget(reset_button)\n    \n    restore_button = QtWidgets.QPushButton(\"Restore Default Shortcuts\")\n    restore_button.clicked.connect(on_restore_button_clicked)\n    layout.addWidget(restore_button)\n    \n    note_label = QtWidgets.QLabel(\"Shortcuts will be updated after restarting the app.\")\n    layout.addWidget(note_label)\n    dialog.setLayout(layout)\n\n    # Set the size of the dialog box\n    dialog.setMinimumWidth(shortcut_table.sizeHintForColumn(0) + shortcut_table.sizeHintForColumn(1) + 55)\n    dialog.setMinimumHeight(shortcut_table.rowHeight(0) * 10 + 50)\n\n    # Set the size policy to allow vertical resizing\n    dialog.setSizePolicy(QtWidgets.QSizePolicy.Policy.Fixed, QtWidgets.QSizePolicy.Policy.Expanding)\n\n    # Display the dialog box\n    dialog.exec()\n    \n    # load shortcuts from shortcut table to be updated\n    shortcuts = {}\n    for row in range(shortcut_table.rowCount()):\n        name_item = shortcut_table.item(row, 0)\n        name = name_item.text()\n        shortcut_item = shortcut_table.item(row, 1)\n        shortcut = shortcut_item.text()\n        shortcuts[name] = shortcut if shortcut != \"\" else None\n\n    # Decode the shortcut names back to their original form\n    shortcuts = {shortcuts_names_decode[key]: value for key, value in shortcuts.items()}\n\n    # Write the updated shortcuts to the config file\n    with open(\"labelme/config/default_config.yaml\", \"w\") as f:\n        config[\"shortcuts\"] = shortcuts\n        yaml.dump(config, f)\n\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/tool_bar.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6 import QtWidgets\n\n\nclass ToolBar(QtWidgets.QToolBar):\n    def __init__(self, title):\n        super(ToolBar, self).__init__(title)\n        layout = self.layout()\n        m = (0, 0, 0, 0)\n        layout.setSpacing(0)\n        layout.setContentsMargins(*m)\n        self.setContentsMargins(*m)\n        self.setWindowFlags(self.windowFlags() | QtCore.Qt.WindowType.FramelessWindowHint)\n\n    def addAction(self, action):\n        if isinstance(action, QtWidgets.QWidgetAction):\n            return super(ToolBar, self).addAction(action)\n        btn = QtWidgets.QToolButton()\n        btn.setDefaultAction(action)\n        btn.setToolButtonStyle(self.toolButtonStyle())\n        self.addWidget(btn)\n\n        # center align\n        for i in range(self.layout().count()):\n            if isinstance(\n                self.layout().itemAt(i).widget(), QtWidgets.QToolButton\n            ):\n                self.layout().itemAt(i).setAlignment(QtCore.Qt.AlignmentFlag.AlignCenter)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/unique_label_qlist_widget.py",
    "content": "# -*- encoding: utf-8 -*-\n\nfrom PyQt6.QtCore import Qt\nfrom PyQt6 import QtWidgets\n\nfrom .escapable_qlist_widget import EscapableQListWidget\n\n\nclass UniqueLabelQListWidget(EscapableQListWidget):\n    def mousePressEvent(self, event):\n        super(UniqueLabelQListWidget, self).mousePressEvent(event)\n        if not self.indexAt(event.pos()).isValid():\n            self.clearSelection()\n\n    def findItemsByLabel(self, label):\n        items = []\n        for row in range(self.count()):\n            item = self.item(row)\n            if item.data(Qt.ItemDataRole.UserRole) == label:\n                items.append(item)\n        return items\n\n    def createItemFromLabel(self, label):\n        item = QtWidgets.QListWidgetItem()\n        item.setData(Qt.ItemDataRole.UserRole, label)\n        return item\n\n    def setItemLabel(self, item, label, color=None):\n        qlabel = QtWidgets.QLabel()\n        if color is None:\n            qlabel.setText(\"{}\".format(label))\n        else:\n            qlabel.setText(\n                '{} <font color=\"#{:02x}{:02x}{:02x}\">●</font>'.format(\n                    label, *color\n                )\n            )\n        qlabel.setAlignment(Qt.AlignmentFlag.AlignBottom)\n\n        item.setSizeHint(qlabel.sizeHint())\n\n        self.setItemWidget(item, qlabel)\n"
  },
  {
    "path": "DLTA_AI_app/labelme/widgets/zoom_widget.py",
    "content": "from PyQt6 import QtCore\nfrom PyQt6 import QtGui\nfrom PyQt6 import QtWidgets\n\n\nclass ZoomWidget(QtWidgets.QSpinBox):\n    def __init__(self, value=100):\n        super(ZoomWidget, self).__init__()\n        self.setButtonSymbols(QtWidgets.QAbstractSpinBox.ButtonSymbols.NoButtons)\n        self.setRange(1, 1000)\n        self.setSuffix(\" %\")\n        self.setValue(value)\n        self.setToolTip(\"Zoom Level\")\n        self.setStatusTip(self.toolTip())\n        self.setAlignment(QtCore.Qt.AlignmentFlag.AlignCenter)\n\n    def minimumSizeHint(self):\n        height = super(ZoomWidget, self).minimumSizeHint().height()\n        fm = QtGui.QFontMetrics(self.font())\n        width = fm.width(str(self.maximum()))\n        return QtCore.QSize(width, height)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.circleci/config.yml",
    "content": "version: 2.1\n\njobs:\n  lint:\n    docker:\n      - image: cimg/python:3.7.4\n    steps:\n      - checkout\n      - run:\n          name: Install pre-commit hook\n          command: |\n            pip install pre-commit\n            pre-commit install\n      - run:\n          name: Linting\n          command: pre-commit run --all-files\n      - run:\n          name: Check docstring coverage\n          command: |\n            pip install interrogate\n            interrogate -v --ignore-init-method --ignore-module --ignore-nested-functions --ignore-regex \"__repr__\" --fail-under 50 mmdet\n\n  build_cpu:\n    parameters:\n      # The python version must match available image tags in\n      # https://circleci.com/developer/images/image/cimg/python\n      python:\n        type: string\n        default: \"3.7.4\"\n      torch:\n        type: string\n      torchvision:\n        type: string\n    docker:\n      - image: cimg/python:<< parameters.python >>\n    resource_class: large\n    steps:\n      - checkout\n      - run:\n          name: Install Libraries\n          command: |\n            sudo apt-get update\n            sudo apt-get install -y ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx libjpeg-dev zlib1g-dev libtinfo-dev libncurses5\n      - run:\n          name: Configure Python & pip\n          command: |\n            pip install --upgrade pip\n            pip install wheel\n      - run:\n          name: Install PyTorch\n          command: |\n            python -V\n            pip install torch==<< parameters.torch >>+cpu torchvision==<< parameters.torchvision >>+cpu -f https://download.pytorch.org/whl/torch_stable.html\n      - when:\n          condition:\n            equal: [ \"3.9.0\", << parameters.python >> ]\n          steps:\n            - run: pip install \"protobuf <= 3.20.1\" && sudo apt-get update && sudo apt-get -y install libprotobuf-dev protobuf-compiler cmake\n      - run:\n          name: Install mmdet dependencies\n          command: |\n            pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cpu/torch<< parameters.torch >>/index.html\n            pip install -r requirements/tests.txt -r requirements/optional.txt\n            pip install albumentations>=0.3.2 --no-binary imgaug,albumentations\n            pip install git+https://github.com/cocodataset/panopticapi.git\n      - run:\n          name: Build and install\n          command: |\n            pip install -e .\n      - run:\n          name: Run unittests\n          command: |\n            coverage run --branch --source mmdet -m pytest tests/\n            coverage xml\n            coverage report -m\n\n  build_cu101:\n    machine:\n      image: ubuntu-1604-cuda-10.1:201909-23\n    resource_class: gpu.nvidia.small\n    steps:\n      - checkout\n      - run:\n          name: Install Libraries\n          command: |\n            sudo apt-get update\n            sudo apt-get install -y git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 libgl1-mesa-glx\n      - run:\n          name: Configure Python & pip\n          command: |\n            pyenv global 3.7.0\n            pip install --upgrade pip\n            pip install wheel\n      - run:\n          name: Install PyTorch\n          command: |\n            python -V\n            pip install torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html\n      - run:\n          name: Install mmdet dependencies\n          # pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch${{matrix.torch_version}}/index.html\n          command: |\n            pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu101/torch1.6.0/index.html\n            pip install -r requirements/tests.txt -r requirements/optional.txt\n            pip install pycocotools\n            pip install albumentations>=0.3.2 --no-binary imgaug,albumentations\n            pip install git+https://github.com/cocodataset/panopticapi.git\n            python -c 'import mmcv; print(mmcv.__version__)'\n      - run:\n          name: Build and install\n          command: |\n            python setup.py check -m -s\n            TORCH_CUDA_ARCH_LIST=7.0 pip install -e .\n      - run:\n          name: Run unittests\n          command: |\n            pytest tests/\n\nworkflows:\n  unit_tests:\n    jobs:\n      - lint\n      - build_cpu:\n          name: build_cpu_th1.6\n          torch: 1.6.0\n          torchvision: 0.7.0\n          requires:\n            - lint\n      - build_cpu:\n          name: build_cpu_th1.7\n          torch: 1.7.0\n          torchvision: 0.8.1\n          requires:\n            - lint\n      - build_cpu:\n          name: build_cpu_th1.8_py3.9\n          torch: 1.8.0\n          torchvision: 0.9.0\n          python: \"3.9.0\"\n          requires:\n            - lint\n      - build_cpu:\n          name: build_cpu_th1.9_py3.8\n          torch: 1.9.0\n          torchvision: 0.10.0\n          python: \"3.8.12\"\n          requires:\n            - lint\n      - build_cpu:\n          name: build_cpu_th1.9_py3.9\n          torch: 1.9.0\n          torchvision: 0.10.0\n          python: \"3.9.0\"\n          requires:\n            - lint\n      - build_cu101:\n          requires:\n            - build_cpu_th1.6\n            - build_cpu_th1.7\n            - build_cpu_th1.8_py3.9\n            - build_cpu_th1.9_py3.8\n            - build_cpu_th1.9_py3.9\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/batch_test_list.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# yapf: disable\natss = dict(\n    config='configs/atss/atss_r50_fpn_1x_coco.py',\n    checkpoint='atss_r50_fpn_1x_coco_20200209-985f7bd0.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=39.4),\n)\nautoassign = dict(\n    config='configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py',\n    checkpoint='auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.4),\n)\ncarafe = dict(\n    config='configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py',\n    checkpoint='faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=38.6),\n)\ncascade_rcnn = [\n    dict(\n        config='configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py',\n        checkpoint='cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth',\n        eval='bbox',\n        metric=dict(bbox_mAP=40.3),\n    ),\n    dict(\n        config='configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',\n        checkpoint='cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth',\n        eval=['bbox', 'segm'],\n        metric=dict(bbox_mAP=41.2, segm_mAP=35.9),\n    ),\n]\ncascade_rpn = dict(\n    config='configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py',\n    checkpoint='crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.4),\n)\ncentripetalnet = dict(\n    config='configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py',  # noqa\n    checkpoint='centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=44.7),\n)\ncornernet = dict(\n    config='configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py',\n    checkpoint='cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=41.2),\n)\ndcn = dict(\n    config='configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py',\n    checkpoint='faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=41.3),\n)\ndeformable_detr = dict(\n    config='configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py',\n    checkpoint='deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=44.5),\n)\ndetectors = dict(\n    config='configs/detectors/detectors_htc_r50_1x_coco.py',\n    checkpoint='detectors_htc_r50_1x_coco-329b1453.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=49.1, segm_mAP=42.6),\n)\ndetr = dict(\n    config='configs/detr/detr_r50_8x2_150e_coco.py',\n    checkpoint='detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.1),\n)\ndouble_heads = dict(\n    config='configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.0),\n)\ndynamic_rcnn = dict(\n    config='configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='dynamic_rcnn_r50_fpn_1x-62a3f276.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=38.9),\n)\nempirical_attention = dict(\n    config='configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py',  # noqa\n    checkpoint='faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=40.0),\n)\nfaster_rcnn = dict(\n    config='configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.4),\n)\nfcos = dict(\n    config='configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py',  # noqa\n    checkpoint='fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=38.7),\n)\nfoveabox = dict(\n    config='configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py',\n    checkpoint='fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.9),\n)\nfree_anchor = dict(\n    config='configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py',\n    checkpoint='retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=38.7),\n)\nfsaf = dict(\n    config='configs/fsaf/fsaf_r50_fpn_1x_coco.py',\n    checkpoint='fsaf_r50_fpn_1x_coco-94ccc51f.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.4),\n)\ngcnet = dict(\n    config='configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py',  # noqa\n    checkpoint='mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth',  # noqa\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=40.4, segm_mAP=36.2),\n)\ngfl = dict(\n    config='configs/gfl/gfl_r50_fpn_1x_coco.py',\n    checkpoint='gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.2),\n)\ngn = dict(\n    config='configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py',\n    checkpoint='mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=40.1, segm_mAP=36.4),\n)\ngn_ws = dict(\n    config='configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py',\n    checkpoint='faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=39.7),\n)\ngrid_rcnn = dict(\n    config='configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py',\n    checkpoint='grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.4),\n)\ngroie = dict(\n    config='configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py',\n    checkpoint='faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=38.3),\n)\nguided_anchoring = [\n    dict(\n        config='configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py',  # noqa\n        checkpoint='ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth',\n        eval='bbox',\n        metric=dict(bbox_mAP=36.9),\n    ),\n    dict(\n        config='configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py',\n        checkpoint='ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth',  # noqa\n        eval='bbox',\n        metric=dict(bbox_mAP=39.6),\n    ),\n]\nhrnet = dict(\n    config='configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py',\n    checkpoint='faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=36.9),\n)\nhtc = dict(\n    config='configs/htc/htc_r50_fpn_1x_coco.py',\n    checkpoint='htc_r50_fpn_1x_coco_20200317-7332cf16.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=42.3, segm_mAP=37.4),\n)\nlibra_rcnn = dict(\n    config='configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=38.3),\n)\nmask_rcnn = dict(\n    config='configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=38.2, segm_mAP=34.7),\n)\nms_rcnn = dict(\n    config='configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py',\n    checkpoint='ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=38.2, segm_mAP=36.0),\n)\nnas_fcos = dict(\n    config='configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py',  # noqa\n    checkpoint='nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=39.4),\n)\nnas_fpn = dict(\n    config='configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py',\n    checkpoint='retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.5),\n)\npaa = dict(\n    config='configs/paa/paa_r50_fpn_1x_coco.py',\n    checkpoint='paa_r50_fpn_1x_coco_20200821-936edec3.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=40.4),\n)\npafpn = dict(\n    config='configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py',\n    checkpoint='faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=37.5),\n)\npisa = dict(\n    config='configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=38.4),\n)\npoint_rend = dict(\n    config='configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py',\n    checkpoint='point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=38.4, segm_mAP=36.3),\n)\nregnet = dict(\n    config='configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py',\n    checkpoint='mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth',  # noqa\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=40.4, segm_mAP=36.7),\n)\nreppoints = dict(\n    config='configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py',\n    checkpoint='reppoints_moment_r50_fpn_1x_coco_20200330-b73db8d1.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.0),\n)\nres2net = dict(\n    config='configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py',\n    checkpoint='faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=43.0),\n)\nresnest = dict(\n    config='configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py',  # noqa\n    checkpoint='faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco_20200926_125502-20289c16.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=42.0),\n)\nretinanet = dict(\n    config='configs/retinanet/retinanet_r50_fpn_1x_coco.py',\n    checkpoint='retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=36.5),\n)\nrpn = dict(\n    config='configs/rpn/rpn_r50_fpn_1x_coco.py',\n    checkpoint='rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth',\n    eval='proposal_fast',\n    metric=dict(AR_1000=58.2),\n)\nsabl = [\n    dict(\n        config='configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py',\n        checkpoint='sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth',\n        eval='bbox',\n        metric=dict(bbox_mAP=37.7),\n    ),\n    dict(\n        config='configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py',\n        checkpoint='sabl_faster_rcnn_r50_fpn_1x_coco-e867595b.pth',\n        eval='bbox',\n        metric=dict(bbox_mAP=39.9),\n    ),\n]\nscnet = dict(\n    config='configs/scnet/scnet_r50_fpn_1x_coco.py',\n    checkpoint='scnet_r50_fpn_1x_coco-c3f09857.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=43.5),\n)\nsparse_rcnn = dict(\n    config='configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py',\n    checkpoint='sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.9),\n)\nssd = [\n    dict(\n        config='configs/ssd/ssd300_coco.py',\n        checkpoint='ssd300_coco_20210803_015428-d231a06e.pth',\n        eval='bbox',\n        metric=dict(bbox_mAP=25.5),\n    ),\n    dict(\n        config='configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py',\n        checkpoint='ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627-974d9307.pth',# noqa\n        eval='bbox',\n        metric=dict(bbox_mAP=21.3),\n    ),\n]\ntridentnet = dict(\n    config='configs/tridentnet/tridentnet_r50_caffe_1x_coco.py',\n    checkpoint='tridentnet_r50_caffe_1x_coco_20201230_141838-2ec0b530.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.6),\n)\nvfnet = dict(\n    config='configs/vfnet/vfnet_r50_fpn_1x_coco.py',\n    checkpoint='vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=41.6),\n)\nyolact = dict(\n    config='configs/yolact/yolact_r50_1x8_coco.py',\n    checkpoint='yolact_r50_1x8_coco_20200908-f38d58df.pth',\n    eval=['bbox', 'segm'],\n    metric=dict(bbox_mAP=31.2, segm_mAP=29.0),\n)\nyolo = dict(\n    config='configs/yolo/yolov3_d53_320_273e_coco.py',\n    checkpoint='yolov3_d53_320_273e_coco-421362b6.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=27.9),\n)\nyolof = dict(\n    config='configs/yolof/yolof_r50_c5_8x8_1x_coco.py',\n    checkpoint='yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth',\n    eval='bbox',\n    metric=dict(bbox_mAP=37.5),\n)\ncenternet = dict(\n    config='configs/centernet/centernet_resnet18_dcnv2_140e_coco.py',\n    checkpoint='centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=29.5),\n)\nyolox = dict(\n    config='configs/yolox/yolox_tiny_8x8_300e_coco.py',\n    checkpoint='yolox_tiny_8x8_300e_coco_20210806_234250-4ff3b67e.pth',  # noqa\n    eval='bbox',\n    metric=dict(bbox_mAP=31.5),\n)\n# yapf: enable\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/batch_train_list.txt",
    "content": "configs/atss/atss_r50_fpn_1x_coco.py\nconfigs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py\nconfigs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py\nconfigs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py\nconfigs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py\nconfigs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py\nconfigs/detectors/detectors_htc_r50_1x_coco.py\nconfigs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py\nconfigs/detr/detr_r50_8x2_150e_coco.py\nconfigs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py\nconfigs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py\nconfigs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py\nconfigs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py\nconfigs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py\nconfigs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py\nconfigs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py\nconfigs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py\nconfigs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py\nconfigs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py\nconfigs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py\nconfigs/fsaf/fsaf_r50_fpn_1x_coco.py\nconfigs/gfl/gfl_r50_fpn_1x_coco.py\nconfigs/ghm/retinanet_ghm_r50_fpn_1x_coco.py\nconfigs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py\nconfigs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py\nconfigs/htc/htc_r50_fpn_1x_coco.py\nconfigs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py\nconfigs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py\nconfigs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py\nconfigs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py\nconfigs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py\nconfigs/paa/paa_r50_fpn_1x_coco.py\nconfigs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py\nconfigs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py\nconfigs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py\nconfigs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py\nconfigs/rpn/rpn_r50_fpn_1x_coco.py\nconfigs/sabl/sabl_retinanet_r50_fpn_1x_coco.py\nconfigs/ssd/ssd300_coco.py\nconfigs/tridentnet/tridentnet_r50_caffe_1x_coco.py\nconfigs/vfnet/vfnet_r50_fpn_1x_coco.py\nconfigs/yolact/yolact_r50_8x8_coco.py\nconfigs/yolo/yolov3_d53_320_273e_coco.py\nconfigs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py\nconfigs/scnet/scnet_r50_fpn_1x_coco.py\nconfigs/yolof/yolof_r50_c5_8x8_1x_coco.py\nconfigs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py\nconfigs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py\nconfigs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py\nconfigs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py\nconfigs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py\nconfigs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py\nconfigs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py\nconfigs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py\nconfigs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py\nconfigs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py\nconfigs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py\nconfigs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py\nconfigs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py\nconfigs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py\nconfigs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py\nconfigs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py\nconfigs/centernet/centernet_resnet18_dcnv2_140e_coco.py\nconfigs/yolox/yolox_tiny_8x8_300e_coco.py\nconfigs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py\nconfigs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/benchmark_filter.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport os.path as osp\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Filter configs to train')\n    parser.add_argument(\n        '--basic-arch',\n        action='store_true',\n        help='to train models in basic arch')\n    parser.add_argument(\n        '--datasets', action='store_true', help='to train models in dataset')\n    parser.add_argument(\n        '--data-pipeline',\n        action='store_true',\n        help='to train models related to data pipeline, e.g. augmentations')\n    parser.add_argument(\n        '--nn-module',\n        action='store_true',\n        help='to train models related to neural network modules')\n    parser.add_argument(\n        '--model-options',\n        nargs='+',\n        help='custom options to special model benchmark')\n    parser.add_argument(\n        '--out',\n        type=str,\n        default='batch_train_list.txt',\n        help='output path of gathered metrics to be stored')\n    args = parser.parse_args()\n    return args\n\n\nbasic_arch_root = [\n    'atss', 'autoassign', 'cascade_rcnn', 'cascade_rpn', 'centripetalnet',\n    'cornernet', 'detectors', 'deformable_detr', 'detr', 'double_heads',\n    'dynamic_rcnn', 'faster_rcnn', 'fcos', 'foveabox', 'fp16', 'free_anchor',\n    'fsaf', 'gfl', 'ghm', 'grid_rcnn', 'guided_anchoring', 'htc', 'ld',\n    'libra_rcnn', 'mask_rcnn', 'ms_rcnn', 'nas_fcos', 'paa', 'pisa',\n    'point_rend', 'reppoints', 'retinanet', 'rpn', 'sabl', 'ssd', 'tridentnet',\n    'vfnet', 'yolact', 'yolo', 'sparse_rcnn', 'scnet', 'yolof', 'centernet'\n]\n\ndatasets_root = [\n    'wider_face', 'pascal_voc', 'cityscapes', 'lvis', 'deepfashion'\n]\n\ndata_pipeline_root = ['albu_example', 'instaboost']\n\nnn_module_root = [\n    'carafe', 'dcn', 'empirical_attention', 'gcnet', 'gn', 'gn+ws', 'hrnet',\n    'pafpn', 'nas_fpn', 'regnet', 'resnest', 'res2net', 'groie'\n]\n\nbenchmark_pool = [\n    'configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py',\n    'configs/atss/atss_r50_fpn_1x_coco.py',\n    'configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py',\n    'configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py',\n    'configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',\n    'configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py',\n    'configs/centernet/centernet_resnet18_dcnv2_140e_coco.py',\n    'configs/centripetalnet/'\n    'centripetalnet_hourglass104_mstest_16x6_210e_coco.py',\n    'configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py',\n    'configs/cornernet/'\n    'cornernet_hourglass104_mstest_8x6_210e_coco.py',\n    'configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py',\n    'configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py',\n    'configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py',\n    'configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py',\n    'configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py',\n    'configs/detectors/detectors_htc_r50_1x_coco.py',\n    'configs/detr/detr_r50_8x2_150e_coco.py',\n    'configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py',\n    'configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py',\n    'configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py',  # noqa\n    'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py',\n    'configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py',\n    'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py',\n    'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py',\n    'configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py',\n    'configs/fcos/fcos_center_r50_caffe_fpn_gn-head_4x4_1x_coco.py',\n    'configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py',\n    'configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py',\n    'configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py',\n    'configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py',\n    'configs/fsaf/fsaf_r50_fpn_1x_coco.py',\n    'configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py',\n    'configs/gfl/gfl_r50_fpn_1x_coco.py',\n    'configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py',\n    'configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py',\n    'configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py',\n    'configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py',\n    'configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py',\n    'configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py',\n    'configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py',\n    'configs/htc/htc_r50_fpn_1x_coco.py',\n    'configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py',\n    'configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py',\n    'configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py',\n    'configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py',\n    'configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py',\n    'configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py',\n    'configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py',\n    'configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py',\n    'configs/paa/paa_r50_fpn_1x_coco.py',\n    'configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py',\n    'configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py',\n    'configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py',\n    'configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py',\n    'configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py',\n    'configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py',\n    'configs/resnest/'\n    'mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py',\n    'configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py',\n    'configs/rpn/rpn_r50_fpn_1x_coco.py',\n    'configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py',\n    'configs/ssd/ssd300_coco.py',\n    'configs/tridentnet/tridentnet_r50_caffe_1x_coco.py',\n    'configs/vfnet/vfnet_r50_fpn_1x_coco.py',\n    'configs/yolact/yolact_r50_1x8_coco.py',\n    'configs/yolo/yolov3_d53_320_273e_coco.py',\n    'configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py',\n    'configs/scnet/scnet_r50_fpn_1x_coco.py',\n    'configs/yolof/yolof_r50_c5_8x8_1x_coco.py',\n]\n\n\ndef main():\n    args = parse_args()\n\n    benchmark_type = []\n    if args.basic_arch:\n        benchmark_type += basic_arch_root\n    if args.datasets:\n        benchmark_type += datasets_root\n    if args.data_pipeline:\n        benchmark_type += data_pipeline_root\n    if args.nn_module:\n        benchmark_type += nn_module_root\n\n    special_model = args.model_options\n    if special_model is not None:\n        benchmark_type += special_model\n\n    config_dpath = 'configs/'\n    benchmark_configs = []\n    for cfg_root in benchmark_type:\n        cfg_dir = osp.join(config_dpath, cfg_root)\n        configs = os.scandir(cfg_dir)\n        for cfg in configs:\n            config_path = osp.join(cfg_dir, cfg.name)\n            if (config_path in benchmark_pool\n                    and config_path not in benchmark_configs):\n                benchmark_configs.append(config_path)\n\n    print(f'Totally found {len(benchmark_configs)} configs to benchmark')\n    with open(args.out, 'w') as f:\n        for config in benchmark_configs:\n            f.write(config + '\\n')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/benchmark_inference_fps.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport os.path as osp\n\nimport mmcv\nfrom mmcv import Config, DictAction\nfrom mmcv.runner import init_dist\nfrom terminaltables import GithubFlavoredMarkdownTable\n\nfrom tools.analysis_tools.benchmark import repeat_measure_inference_speed\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet benchmark a model of FPS')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint_root', help='Checkpoint file root path')\n    parser.add_argument(\n        '--round-num',\n        type=int,\n        default=1,\n        help='round a number to a given precision in decimal digits')\n    parser.add_argument(\n        '--repeat-num',\n        type=int,\n        default=1,\n        help='number of repeat times of measurement for averaging the results')\n    parser.add_argument(\n        '--out', type=str, help='output path of gathered fps to be stored')\n    parser.add_argument(\n        '--max-iter', type=int, default=2000, help='num of max iter')\n    parser.add_argument(\n        '--log-interval', type=int, default=50, help='interval of logging')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n    return args\n\n\ndef results2markdown(result_dict):\n    table_data = []\n    is_multiple_results = False\n    for cfg_name, value in result_dict.items():\n        name = cfg_name.replace('configs/', '')\n        fps = value['fps']\n        ms_times_pre_image = value['ms_times_pre_image']\n        if isinstance(fps, list):\n            is_multiple_results = True\n            mean_fps = value['mean_fps']\n            mean_times_pre_image = value['mean_times_pre_image']\n            fps_str = ','.join([str(s) for s in fps])\n            ms_times_pre_image_str = ','.join(\n                [str(s) for s in ms_times_pre_image])\n            table_data.append([\n                name, fps_str, mean_fps, ms_times_pre_image_str,\n                mean_times_pre_image\n            ])\n        else:\n            table_data.append([name, fps, ms_times_pre_image])\n\n    if is_multiple_results:\n        table_data.insert(0, [\n            'model', 'fps', 'mean_fps', 'times_pre_image(ms)',\n            'mean_times_pre_image(ms)'\n        ])\n\n    else:\n        table_data.insert(0, ['model', 'fps', 'times_pre_image(ms)'])\n    table = GithubFlavoredMarkdownTable(table_data)\n    print(table.table, flush=True)\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    assert args.round_num >= 0\n    assert args.repeat_num >= 1\n\n    config = Config.fromfile(args.config)\n\n    if args.launcher == 'none':\n        raise NotImplementedError('Only supports distributed mode')\n    else:\n        init_dist(args.launcher)\n\n    result_dict = {}\n    for model_key in config:\n        model_infos = config[model_key]\n        if not isinstance(model_infos, list):\n            model_infos = [model_infos]\n        for model_info in model_infos:\n            record_metrics = model_info['metric']\n            cfg_path = model_info['config'].strip()\n            cfg = Config.fromfile(cfg_path)\n            checkpoint = osp.join(args.checkpoint_root,\n                                  model_info['checkpoint'].strip())\n            try:\n                fps = repeat_measure_inference_speed(cfg, checkpoint,\n                                                     args.max_iter,\n                                                     args.log_interval,\n                                                     args.fuse_conv_bn,\n                                                     args.repeat_num)\n                if args.repeat_num > 1:\n                    fps_list = [round(fps_, args.round_num) for fps_ in fps]\n                    times_pre_image_list = [\n                        round(1000 / fps_, args.round_num) for fps_ in fps\n                    ]\n                    mean_fps = round(\n                        sum(fps_list) / len(fps_list), args.round_num)\n                    mean_times_pre_image = round(\n                        sum(times_pre_image_list) / len(times_pre_image_list),\n                        args.round_num)\n                    print(\n                        f'{cfg_path} '\n                        f'Overall fps: {fps_list}[{mean_fps}] img / s, '\n                        f'times per image: '\n                        f'{times_pre_image_list}[{mean_times_pre_image}] '\n                        f'ms / img',\n                        flush=True)\n                    result_dict[cfg_path] = dict(\n                        fps=fps_list,\n                        mean_fps=mean_fps,\n                        ms_times_pre_image=times_pre_image_list,\n                        mean_times_pre_image=mean_times_pre_image)\n                else:\n                    print(\n                        f'{cfg_path} fps : {fps:.{args.round_num}f} img / s, '\n                        f'times per image: {1000 / fps:.{args.round_num}f} '\n                        f'ms / img',\n                        flush=True)\n                    result_dict[cfg_path] = dict(\n                        fps=round(fps, args.round_num),\n                        ms_times_pre_image=round(1000 / fps, args.round_num))\n            except Exception as e:\n                print(f'{cfg_path} error: {repr(e)}')\n                if args.repeat_num > 1:\n                    result_dict[cfg_path] = dict(\n                        fps=[0],\n                        mean_fps=0,\n                        ms_times_pre_image=[0],\n                        mean_times_pre_image=0)\n                else:\n                    result_dict[cfg_path] = dict(fps=0, ms_times_pre_image=0)\n\n    if args.out:\n        mmcv.mkdir_or_exist(args.out)\n        mmcv.dump(result_dict, osp.join(args.out, 'batch_inference_fps.json'))\n\n    results2markdown(result_dict)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/benchmark_test_image.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport logging\nimport os.path as osp\nfrom argparse import ArgumentParser\n\nfrom mmcv import Config\n\nfrom mmdet.apis import inference_detector, init_detector, show_result_pyplot\nfrom mmdet.utils import get_root_logger\n\n\ndef parse_args():\n    parser = ArgumentParser()\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint_root', help='Checkpoint file root path')\n    parser.add_argument('--img', default='demo/demo.jpg', help='Image file')\n    parser.add_argument('--aug', action='store_true', help='aug test')\n    parser.add_argument('--model-name', help='model name to inference')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--wait-time',\n        type=float,\n        default=1,\n        help='the interval of show (s), 0 is block')\n    parser.add_argument(\n        '--device', default='cuda:0', help='Device used for inference')\n    parser.add_argument(\n        '--score-thr', type=float, default=0.3, help='bbox score threshold')\n    args = parser.parse_args()\n    return args\n\n\ndef inference_model(config_name, checkpoint, args, logger=None):\n    cfg = Config.fromfile(config_name)\n    if args.aug:\n        if 'flip' in cfg.data.test.pipeline[1]:\n            cfg.data.test.pipeline[1].flip = True\n        else:\n            if logger is not None:\n                logger.error(f'{config_name}: unable to start aug test')\n            else:\n                print(f'{config_name}: unable to start aug test', flush=True)\n\n    model = init_detector(cfg, checkpoint, device=args.device)\n    # test a single image\n    result = inference_detector(model, args.img)\n\n    # show the results\n    if args.show:\n        show_result_pyplot(\n            model,\n            args.img,\n            result,\n            score_thr=args.score_thr,\n            wait_time=args.wait_time)\n    return result\n\n\n# Sample test whether the inference code is correct\ndef main(args):\n    config = Config.fromfile(args.config)\n\n    # test single model\n    if args.model_name:\n        if args.model_name in config:\n            model_infos = config[args.model_name]\n            if not isinstance(model_infos, list):\n                model_infos = [model_infos]\n            model_info = model_infos[0]\n            config_name = model_info['config'].strip()\n            print(f'processing: {config_name}', flush=True)\n            checkpoint = osp.join(args.checkpoint_root,\n                                  model_info['checkpoint'].strip())\n            # build the model from a config file and a checkpoint file\n            inference_model(config_name, checkpoint, args)\n            return\n        else:\n            raise RuntimeError('model name input error.')\n\n    # test all model\n    logger = get_root_logger(\n        log_file='benchmark_test_image.log', log_level=logging.ERROR)\n\n    for model_key in config:\n        model_infos = config[model_key]\n        if not isinstance(model_infos, list):\n            model_infos = [model_infos]\n        for model_info in model_infos:\n            print('processing: ', model_info['config'], flush=True)\n            config_name = model_info['config'].strip()\n            checkpoint = osp.join(args.checkpoint_root,\n                                  model_info['checkpoint'].strip())\n            try:\n                # build the model from a config file and a checkpoint file\n                inference_model(config_name, checkpoint, args, logger)\n            except Exception as e:\n                logger.error(f'{config_name} \" : {repr(e)}')\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/check_links.py",
    "content": "# Modified from:\n# https://github.com/allenai/allennlp/blob/main/scripts/check_links.py\n\nimport argparse\nimport logging\nimport os\nimport pathlib\nimport re\nimport sys\nfrom multiprocessing.dummy import Pool\nfrom typing import NamedTuple, Optional, Tuple\n\nimport requests\nfrom mmcv.utils import get_logger\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Goes through all the inline-links '\n        'in markdown files and reports the breakages')\n    parser.add_argument(\n        '--num-threads',\n        type=int,\n        default=100,\n        help='Number of processes to confirm the link')\n    parser.add_argument('--https-proxy', type=str, help='https proxy')\n    parser.add_argument(\n        '--out',\n        type=str,\n        default='link_reports.txt',\n        help='output path of reports')\n    args = parser.parse_args()\n    return args\n\n\nOK_STATUS_CODES = (\n    200,\n    401,  # the resource exists but may require some sort of login.\n    403,  # ^ same\n    405,  # HEAD method not allowed.\n    # the resource exists, but our default 'Accept-' header may not\n    # match what the server can provide.\n    406,\n)\n\n\nclass MatchTuple(NamedTuple):\n    source: str\n    name: str\n    link: str\n\n\ndef check_link(\n        match_tuple: MatchTuple,\n        http_session: requests.Session,\n        logger: logging = None) -> Tuple[MatchTuple, bool, Optional[str]]:\n    reason: Optional[str] = None\n    if match_tuple.link.startswith('http'):\n        result_ok, reason = check_url(match_tuple, http_session)\n    else:\n        result_ok = check_path(match_tuple)\n    if logger is None:\n        print(f\"  {'✓' if result_ok else '✗'} {match_tuple.link}\")\n    else:\n        logger.info(f\"  {'✓' if result_ok else '✗'} {match_tuple.link}\")\n    return match_tuple, result_ok, reason\n\n\ndef check_url(match_tuple: MatchTuple,\n              http_session: requests.Session) -> Tuple[bool, str]:\n    \"\"\"Check if a URL is reachable.\"\"\"\n    try:\n        result = http_session.head(\n            match_tuple.link, timeout=5, allow_redirects=True)\n        return (\n            result.ok or result.status_code in OK_STATUS_CODES,\n            f'status code = {result.status_code}',\n        )\n    except (requests.ConnectionError, requests.Timeout):\n        return False, 'connection error'\n\n\ndef check_path(match_tuple: MatchTuple) -> bool:\n    \"\"\"Check if a file in this repository exists.\"\"\"\n    relative_path = match_tuple.link.split('#')[0]\n    full_path = os.path.join(\n        os.path.dirname(str(match_tuple.source)), relative_path)\n    return os.path.exists(full_path)\n\n\ndef main():\n    args = parse_args()\n\n    # setup logger\n    logger = get_logger(name='mmdet', log_file=args.out)\n\n    # setup https_proxy\n    if args.https_proxy:\n        os.environ['https_proxy'] = args.https_proxy\n\n    # setup http_session\n    http_session = requests.Session()\n    for resource_prefix in ('http://', 'https://'):\n        http_session.mount(\n            resource_prefix,\n            requests.adapters.HTTPAdapter(\n                max_retries=5,\n                pool_connections=20,\n                pool_maxsize=args.num_threads),\n        )\n\n    logger.info('Finding all markdown files in the current directory...')\n\n    project_root = (pathlib.Path(__file__).parent / '..').resolve()\n    markdown_files = project_root.glob('**/*.md')\n\n    all_matches = set()\n    url_regex = re.compile(r'\\[([^!][^\\]]+)\\]\\(([^)(]+)\\)')\n    for markdown_file in markdown_files:\n        with open(markdown_file) as handle:\n            for line in handle.readlines():\n                matches = url_regex.findall(line)\n                for name, link in matches:\n                    if 'localhost' not in link:\n                        all_matches.add(\n                            MatchTuple(\n                                source=str(markdown_file),\n                                name=name,\n                                link=link))\n\n    logger.info(f'  {len(all_matches)} markdown files found')\n    logger.info('Checking to make sure we can retrieve each link...')\n\n    with Pool(processes=args.num_threads) as pool:\n        results = pool.starmap(check_link, [(match, http_session, logger)\n                                            for match in list(all_matches)])\n\n    # collect unreachable results\n    unreachable_results = [(match_tuple, reason)\n                           for match_tuple, success, reason in results\n                           if not success]\n\n    if unreachable_results:\n        logger.info('================================================')\n        logger.info(f'Unreachable links ({len(unreachable_results)}):')\n        for match_tuple, reason in unreachable_results:\n            logger.info('  > Source: ' + match_tuple.source)\n            logger.info('    Name: ' + match_tuple.name)\n            logger.info('    Link: ' + match_tuple.link)\n            if reason is not None:\n                logger.info('    Reason: ' + reason)\n        sys.exit(1)\n    logger.info('No Unreachable link found.')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/convert_test_benchmark_script.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport os.path as osp\n\nfrom mmcv import Config\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert benchmark model list to script')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('--port', type=int, default=29666, help='dist port')\n    parser.add_argument(\n        '--work-dir',\n        default='tools/batch_test',\n        help='the dir to save metric')\n    parser.add_argument(\n        '--run', action='store_true', help='run script directly')\n    parser.add_argument(\n        '--out', type=str, help='path to save model benchmark script')\n\n    args = parser.parse_args()\n    return args\n\n\ndef process_model_info(model_info, work_dir):\n    config = model_info['config'].strip()\n    fname, _ = osp.splitext(osp.basename(config))\n    job_name = fname\n    work_dir = osp.join(work_dir, fname)\n    checkpoint = model_info['checkpoint'].strip()\n    if not isinstance(model_info['eval'], list):\n        evals = [model_info['eval']]\n    else:\n        evals = model_info['eval']\n    eval = ' '.join(evals)\n    return dict(\n        config=config,\n        job_name=job_name,\n        work_dir=work_dir,\n        checkpoint=checkpoint,\n        eval=eval)\n\n\ndef create_test_bash_info(commands, model_test_dict, port, script_name,\n                          partition):\n    config = model_test_dict['config']\n    job_name = model_test_dict['job_name']\n    checkpoint = model_test_dict['checkpoint']\n    work_dir = model_test_dict['work_dir']\n    eval = model_test_dict['eval']\n\n    echo_info = f' \\necho \\'{config}\\' &'\n    commands.append(echo_info)\n    commands.append('\\n')\n\n    command_info = f'GPUS=8  GPUS_PER_NODE=8  ' \\\n                   f'CPUS_PER_TASK=2 {script_name} '\n\n    command_info += f'{partition} '\n    command_info += f'{job_name} '\n    command_info += f'{config} '\n    command_info += f'$CHECKPOINT_DIR/{checkpoint} '\n    command_info += f'--work-dir {work_dir} '\n\n    command_info += f'--eval {eval} '\n    command_info += f'--cfg-option dist_params.port={port} '\n    command_info += ' &'\n\n    commands.append(command_info)\n\n\ndef main():\n    args = parse_args()\n    if args.out:\n        out_suffix = args.out.split('.')[-1]\n        assert args.out.endswith('.sh'), \\\n            f'Expected out file path suffix is .sh, but get .{out_suffix}'\n    assert args.out or args.run, \\\n        ('Please specify at least one operation (save/run/ the '\n         'script) with the argument \"--out\" or \"--run\"')\n\n    commands = []\n    partition_name = 'PARTITION=$1 '\n    commands.append(partition_name)\n    commands.append('\\n')\n\n    checkpoint_root = 'CHECKPOINT_DIR=$2 '\n    commands.append(checkpoint_root)\n    commands.append('\\n')\n\n    script_name = osp.join('tools', 'slurm_test.sh')\n    port = args.port\n    work_dir = args.work_dir\n\n    cfg = Config.fromfile(args.config)\n\n    for model_key in cfg:\n        model_infos = cfg[model_key]\n        if not isinstance(model_infos, list):\n            model_infos = [model_infos]\n        for model_info in model_infos:\n            print('processing: ', model_info['config'])\n            model_test_dict = process_model_info(model_info, work_dir)\n            create_test_bash_info(commands, model_test_dict, port, script_name,\n                                  '$PARTITION')\n            port += 1\n\n    command_str = ''.join(commands)\n    if args.out:\n        with open(args.out, 'w') as f:\n            f.write(command_str)\n    if args.run:\n        os.system(command_str)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/convert_train_benchmark_script.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport os.path as osp\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert benchmark model json to script')\n    parser.add_argument(\n        'txt_path', type=str, help='txt path output by benchmark_filter')\n    parser.add_argument(\n        '--partition',\n        type=str,\n        default='openmmlab',\n        help='slurm partition name')\n    parser.add_argument(\n        '--max-keep-ckpts',\n        type=int,\n        default=1,\n        help='The maximum checkpoints to keep')\n    parser.add_argument(\n        '--run', action='store_true', help='run script directly')\n    parser.add_argument(\n        '--out', type=str, help='path to save model benchmark script')\n\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    if args.out:\n        out_suffix = args.out.split('.')[-1]\n        assert args.out.endswith('.sh'), \\\n            f'Expected out file path suffix is .sh, but get .{out_suffix}'\n    assert args.out or args.run, \\\n        ('Please specify at least one operation (save/run/ the '\n         'script) with the argument \"--out\" or \"--run\"')\n\n    partition = args.partition  # cluster name\n\n    root_name = './tools'\n    train_script_name = osp.join(root_name, 'slurm_train.sh')\n    # stdout is no output\n    stdout_cfg = '>/dev/null'\n\n    max_keep_ckpts = args.max_keep_ckpts\n\n    commands = []\n    with open(args.txt_path, 'r') as f:\n        model_cfgs = f.readlines()\n        for i, cfg in enumerate(model_cfgs):\n            cfg = cfg.strip()\n            if len(cfg) == 0:\n                continue\n            # print cfg name\n            echo_info = f'echo \\'{cfg}\\' &'\n            commands.append(echo_info)\n            commands.append('\\n')\n\n            fname, _ = osp.splitext(osp.basename(cfg))\n            out_fname = osp.join(root_name, 'work_dir', fname)\n            # default setting\n            if cfg.find('16x') >= 0:\n                command_info = f'GPUS=16  GPUS_PER_NODE=8  ' \\\n                               f'CPUS_PER_TASK=2 {train_script_name} '\n            elif cfg.find('gn-head_4x4_1x_coco.py') >= 0 or \\\n                    cfg.find('gn-head_4x4_2x_coco.py') >= 0:\n                command_info = f'GPUS=4  GPUS_PER_NODE=4  ' \\\n                               f'CPUS_PER_TASK=2 {train_script_name} '\n            else:\n                command_info = f'GPUS=8  GPUS_PER_NODE=8  ' \\\n                               f'CPUS_PER_TASK=2 {train_script_name} '\n            command_info += f'{partition} '\n            command_info += f'{fname} '\n            command_info += f'{cfg} '\n            command_info += f'{out_fname} '\n            if max_keep_ckpts:\n                command_info += f'--cfg-options ' \\\n                                f'checkpoint_config.max_keep_ckpts=' \\\n                                f'{max_keep_ckpts}' + ' '\n            command_info += f'{stdout_cfg} &'\n\n            commands.append(command_info)\n\n            if i < len(model_cfgs):\n                commands.append('\\n')\n\n        command_str = ''.join(commands)\n        if args.out:\n            with open(args.out, 'w') as f:\n                f.write(command_str)\n        if args.run:\n            os.system(command_str)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/gather_models.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport glob\nimport json\nimport os.path as osp\nimport shutil\nimport subprocess\nfrom collections import OrderedDict\n\nimport mmcv\nimport torch\nimport yaml\n\n\ndef ordered_yaml_dump(data, stream=None, Dumper=yaml.SafeDumper, **kwds):\n\n    class OrderedDumper(Dumper):\n        pass\n\n    def _dict_representer(dumper, data):\n        return dumper.represent_mapping(\n            yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items())\n\n    OrderedDumper.add_representer(OrderedDict, _dict_representer)\n    return yaml.dump(data, stream, OrderedDumper, **kwds)\n\n\ndef process_checkpoint(in_file, out_file):\n    checkpoint = torch.load(in_file, map_location='cpu')\n    # remove optimizer for smaller file size\n    if 'optimizer' in checkpoint:\n        del checkpoint['optimizer']\n\n    # remove ema state_dict\n    for key in list(checkpoint['state_dict']):\n        if key.startswith('ema_'):\n            checkpoint['state_dict'].pop(key)\n\n    # if it is necessary to remove some sensitive data in checkpoint['meta'],\n    # add the code here.\n    if torch.__version__ >= '1.6':\n        torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)\n    else:\n        torch.save(checkpoint, out_file)\n    sha = subprocess.check_output(['sha256sum', out_file]).decode()\n    final_file = out_file.rstrip('.pth') + '-{}.pth'.format(sha[:8])\n    subprocess.Popen(['mv', out_file, final_file])\n    return final_file\n\n\ndef is_by_epoch(config):\n    cfg = mmcv.Config.fromfile('./configs/' + config)\n    return cfg.runner.type == 'EpochBasedRunner'\n\n\ndef get_final_epoch_or_iter(config):\n    cfg = mmcv.Config.fromfile('./configs/' + config)\n    if cfg.runner.type == 'EpochBasedRunner':\n        return cfg.runner.max_epochs\n    else:\n        return cfg.runner.max_iters\n\n\ndef get_best_epoch_or_iter(exp_dir):\n    best_epoch_iter_full_path = list(\n        sorted(glob.glob(osp.join(exp_dir, 'best_*.pth'))))[-1]\n    best_epoch_or_iter_model_path = best_epoch_iter_full_path.split('/')[-1]\n    best_epoch_or_iter = best_epoch_or_iter_model_path.\\\n        split('_')[-1].split('.')[0]\n    return best_epoch_or_iter_model_path, int(best_epoch_or_iter)\n\n\ndef get_real_epoch_or_iter(config):\n    cfg = mmcv.Config.fromfile('./configs/' + config)\n    if cfg.runner.type == 'EpochBasedRunner':\n        epoch = cfg.runner.max_epochs\n        if cfg.data.train.type == 'RepeatDataset':\n            epoch *= cfg.data.train.times\n        return epoch\n    else:\n        return cfg.runner.max_iters\n\n\ndef get_final_results(log_json_path,\n                      epoch_or_iter,\n                      results_lut,\n                      by_epoch=True):\n    result_dict = dict()\n    last_val_line = None\n    last_train_line = None\n    last_val_line_idx = -1\n    last_train_line_idx = -1\n    with open(log_json_path, 'r') as f:\n        for i, line in enumerate(f.readlines()):\n            log_line = json.loads(line)\n            if 'mode' not in log_line.keys():\n                continue\n\n            if by_epoch:\n                if (log_line['mode'] == 'train'\n                        and log_line['epoch'] == epoch_or_iter):\n                    result_dict['memory'] = log_line['memory']\n\n                if (log_line['mode'] == 'val'\n                        and log_line['epoch'] == epoch_or_iter):\n                    result_dict.update({\n                        key: log_line[key]\n                        for key in results_lut if key in log_line\n                    })\n                    return result_dict\n            else:\n                if log_line['mode'] == 'train':\n                    last_train_line_idx = i\n                    last_train_line = log_line\n\n                if log_line and log_line['mode'] == 'val':\n                    last_val_line_idx = i\n                    last_val_line = log_line\n\n    # bug: max_iters = 768, last_train_line['iter'] = 750\n    assert last_val_line_idx == last_train_line_idx + 1, \\\n        'Log file is incomplete'\n    result_dict['memory'] = last_train_line['memory']\n    result_dict.update({\n        key: last_val_line[key]\n        for key in results_lut if key in last_val_line\n    })\n\n    return result_dict\n\n\ndef get_dataset_name(config):\n    # If there are more dataset, add here.\n    name_map = dict(\n        CityscapesDataset='Cityscapes',\n        CocoDataset='COCO',\n        CocoPanopticDataset='COCO',\n        DeepFashionDataset='Deep Fashion',\n        LVISV05Dataset='LVIS v0.5',\n        LVISV1Dataset='LVIS v1',\n        VOCDataset='Pascal VOC',\n        WIDERFaceDataset='WIDER Face',\n        OpenImagesDataset='OpenImagesDataset',\n        OpenImagesChallengeDataset='OpenImagesChallengeDataset')\n    cfg = mmcv.Config.fromfile('./configs/' + config)\n    return name_map[cfg.dataset_type]\n\n\ndef convert_model_info_to_pwc(model_infos):\n    pwc_files = {}\n    for model in model_infos:\n        cfg_folder_name = osp.split(model['config'])[-2]\n        pwc_model_info = OrderedDict()\n        pwc_model_info['Name'] = osp.split(model['config'])[-1].split('.')[0]\n        pwc_model_info['In Collection'] = 'Please fill in Collection name'\n        pwc_model_info['Config'] = osp.join('configs', model['config'])\n\n        # get metadata\n        memory = round(model['results']['memory'] / 1024, 1)\n        meta_data = OrderedDict()\n        meta_data['Training Memory (GB)'] = memory\n        if 'epochs' in model:\n            meta_data['Epochs'] = get_real_epoch_or_iter(model['config'])\n        else:\n            meta_data['Iterations'] = get_real_epoch_or_iter(model['config'])\n        pwc_model_info['Metadata'] = meta_data\n\n        # get dataset name\n        dataset_name = get_dataset_name(model['config'])\n\n        # get results\n        results = []\n        # if there are more metrics, add here.\n        if 'bbox_mAP' in model['results']:\n            metric = round(model['results']['bbox_mAP'] * 100, 1)\n            results.append(\n                OrderedDict(\n                    Task='Object Detection',\n                    Dataset=dataset_name,\n                    Metrics={'box AP': metric}))\n        if 'segm_mAP' in model['results']:\n            metric = round(model['results']['segm_mAP'] * 100, 1)\n            results.append(\n                OrderedDict(\n                    Task='Instance Segmentation',\n                    Dataset=dataset_name,\n                    Metrics={'mask AP': metric}))\n        if 'PQ' in model['results']:\n            metric = round(model['results']['PQ'], 1)\n            results.append(\n                OrderedDict(\n                    Task='Panoptic Segmentation',\n                    Dataset=dataset_name,\n                    Metrics={'PQ': metric}))\n        pwc_model_info['Results'] = results\n\n        link_string = 'https://download.openmmlab.com/mmdetection/v2.0/'\n        link_string += '{}/{}'.format(model['config'].rstrip('.py'),\n                                      osp.split(model['model_path'])[-1])\n        pwc_model_info['Weights'] = link_string\n        if cfg_folder_name in pwc_files:\n            pwc_files[cfg_folder_name].append(pwc_model_info)\n        else:\n            pwc_files[cfg_folder_name] = [pwc_model_info]\n    return pwc_files\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Gather benchmarked models')\n    parser.add_argument(\n        'root',\n        type=str,\n        help='root path of benchmarked models to be gathered')\n    parser.add_argument(\n        'out', type=str, help='output path of gathered models to be stored')\n    parser.add_argument(\n        '--best',\n        action='store_true',\n        help='whether to gather the best model.')\n\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    models_root = args.root\n    models_out = args.out\n    mmcv.mkdir_or_exist(models_out)\n\n    # find all models in the root directory to be gathered\n    raw_configs = list(mmcv.scandir('./configs', '.py', recursive=True))\n\n    # filter configs that is not trained in the experiments dir\n    used_configs = []\n    for raw_config in raw_configs:\n        if osp.exists(osp.join(models_root, raw_config)):\n            used_configs.append(raw_config)\n    print(f'Find {len(used_configs)} models to be gathered')\n\n    # find final_ckpt and log file for trained each config\n    # and parse the best performance\n    model_infos = []\n    for used_config in used_configs:\n        exp_dir = osp.join(models_root, used_config)\n        by_epoch = is_by_epoch(used_config)\n        # check whether the exps is finished\n        if args.best is True:\n            final_model, final_epoch_or_iter = get_best_epoch_or_iter(exp_dir)\n        else:\n            final_epoch_or_iter = get_final_epoch_or_iter(used_config)\n            final_model = '{}_{}.pth'.format('epoch' if by_epoch else 'iter',\n                                             final_epoch_or_iter)\n\n        model_path = osp.join(exp_dir, final_model)\n        # skip if the model is still training\n        if not osp.exists(model_path):\n            continue\n\n        # get the latest logs\n        log_json_path = list(\n            sorted(glob.glob(osp.join(exp_dir, '*.log.json'))))[-1]\n        log_txt_path = list(sorted(glob.glob(osp.join(exp_dir, '*.log'))))[-1]\n        cfg = mmcv.Config.fromfile('./configs/' + used_config)\n        results_lut = cfg.evaluation.metric\n        if not isinstance(results_lut, list):\n            results_lut = [results_lut]\n        # case when using VOC, the evaluation key is only 'mAP'\n        # when using Panoptic Dataset, the evaluation key is 'PQ'.\n        for i, key in enumerate(results_lut):\n            if 'mAP' not in key and 'PQ' not in key:\n                results_lut[i] = key + '_mAP'\n        model_performance = get_final_results(log_json_path,\n                                              final_epoch_or_iter, results_lut,\n                                              by_epoch)\n\n        if model_performance is None:\n            continue\n\n        model_time = osp.split(log_txt_path)[-1].split('.')[0]\n        model_info = dict(\n            config=used_config,\n            results=model_performance,\n            model_time=model_time,\n            final_model=final_model,\n            log_json_path=osp.split(log_json_path)[-1])\n        model_info['epochs' if by_epoch else 'iterations'] =\\\n            final_epoch_or_iter\n        model_infos.append(model_info)\n\n    # publish model for each checkpoint\n    publish_model_infos = []\n    for model in model_infos:\n        model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))\n        mmcv.mkdir_or_exist(model_publish_dir)\n\n        model_name = osp.split(model['config'])[-1].split('.')[0]\n\n        model_name += '_' + model['model_time']\n        publish_model_path = osp.join(model_publish_dir, model_name)\n        trained_model_path = osp.join(models_root, model['config'],\n                                      model['final_model'])\n\n        # convert model\n        final_model_path = process_checkpoint(trained_model_path,\n                                              publish_model_path)\n\n        # copy log\n        shutil.copy(\n            osp.join(models_root, model['config'], model['log_json_path']),\n            osp.join(model_publish_dir, f'{model_name}.log.json'))\n        shutil.copy(\n            osp.join(models_root, model['config'],\n                     model['log_json_path'].rstrip('.json')),\n            osp.join(model_publish_dir, f'{model_name}.log'))\n\n        # copy config to guarantee reproducibility\n        config_path = model['config']\n        config_path = osp.join(\n            'configs',\n            config_path) if 'configs' not in config_path else config_path\n        target_config_path = osp.split(config_path)[-1]\n        shutil.copy(config_path, osp.join(model_publish_dir,\n                                          target_config_path))\n\n        model['model_path'] = final_model_path\n        publish_model_infos.append(model)\n\n    models = dict(models=publish_model_infos)\n    print(f'Totally gathered {len(publish_model_infos)} models')\n    mmcv.dump(models, osp.join(models_out, 'model_info.json'))\n\n    pwc_files = convert_model_info_to_pwc(publish_model_infos)\n    for name in pwc_files:\n        with open(osp.join(models_out, name + '_metafile.yml'), 'w') as f:\n            ordered_yaml_dump(pwc_files[name], f, encoding='utf-8')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/gather_test_benchmark_metric.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport glob\nimport os.path as osp\n\nimport mmcv\nfrom mmcv import Config\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Gather benchmarked models metric')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument(\n        'root',\n        type=str,\n        help='root path of benchmarked models to be gathered')\n    parser.add_argument(\n        '--out', type=str, help='output path of gathered metrics to be stored')\n    parser.add_argument(\n        '--not-show', action='store_true', help='not show metrics')\n    parser.add_argument(\n        '--show-all', action='store_true', help='show all model metrics')\n\n    args = parser.parse_args()\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    root_path = args.root\n    metrics_out = args.out\n    result_dict = {}\n\n    cfg = Config.fromfile(args.config)\n\n    for model_key in cfg:\n        model_infos = cfg[model_key]\n        if not isinstance(model_infos, list):\n            model_infos = [model_infos]\n        for model_info in model_infos:\n            record_metrics = model_info['metric']\n            config = model_info['config'].strip()\n            fname, _ = osp.splitext(osp.basename(config))\n            metric_json_dir = osp.join(root_path, fname)\n            if osp.exists(metric_json_dir):\n                json_list = glob.glob(osp.join(metric_json_dir, '*.json'))\n                if len(json_list) > 0:\n                    log_json_path = list(sorted(json_list))[-1]\n\n                    metric = mmcv.load(log_json_path)\n                    if config in metric.get('config', {}):\n\n                        new_metrics = dict()\n                        for record_metric_key in record_metrics:\n                            record_metric_key_bk = record_metric_key\n                            old_metric = record_metrics[record_metric_key]\n                            if record_metric_key == 'AR_1000':\n                                record_metric_key = 'AR@1000'\n                            if record_metric_key not in metric['metric']:\n                                raise KeyError(\n                                    'record_metric_key not exist, please '\n                                    'check your config')\n                            new_metric = round(\n                                metric['metric'][record_metric_key] * 100, 1)\n                            new_metrics[record_metric_key_bk] = new_metric\n\n                        if args.show_all:\n                            result_dict[config] = dict(\n                                before=record_metrics, after=new_metrics)\n                        else:\n                            for record_metric_key in record_metrics:\n                                old_metric = record_metrics[record_metric_key]\n                                new_metric = new_metrics[record_metric_key]\n                                if old_metric != new_metric:\n                                    result_dict[config] = dict(\n                                        before=record_metrics,\n                                        after=new_metrics)\n                                    break\n                    else:\n                        print(f'{config} not included in: {log_json_path}')\n                else:\n                    print(f'{config} not exist file: {metric_json_dir}')\n            else:\n                print(f'{config} not exist dir: {metric_json_dir}')\n\n    if metrics_out:\n        mmcv.mkdir_or_exist(metrics_out)\n        mmcv.dump(result_dict,\n                  osp.join(metrics_out, 'batch_test_metric_info.json'))\n    if not args.not_show:\n        print('===================================')\n        for config_name, metrics in result_dict.items():\n            print(config_name, metrics)\n        print('===================================')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/gather_train_benchmark_metric.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport glob\nimport os.path as osp\n\nimport mmcv\nfrom gather_models import get_final_results\n\ntry:\n    import xlrd\nexcept ImportError:\n    xlrd = None\ntry:\n    import xlutils\n    from xlutils.copy import copy\nexcept ImportError:\n    xlutils = None\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Gather benchmarked models metric')\n    parser.add_argument(\n        'root',\n        type=str,\n        help='root path of benchmarked models to be gathered')\n    parser.add_argument(\n        'txt_path', type=str, help='txt path output by benchmark_filter')\n    parser.add_argument(\n        '--out', type=str, help='output path of gathered metrics to be stored')\n    parser.add_argument(\n        '--not-show', action='store_true', help='not show metrics')\n    parser.add_argument(\n        '--excel', type=str, help='input path of excel to be recorded')\n    parser.add_argument(\n        '--ncol', type=int, help='Number of column to be modified or appended')\n\n    args = parser.parse_args()\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if args.excel:\n        assert args.ncol, 'Please specify \"--excel\" and \"--ncol\" ' \\\n                          'at the same time'\n        if xlrd is None:\n            raise RuntimeError(\n                'xlrd is not installed,'\n                'Please use “pip install xlrd==1.2.0” to install')\n        if xlutils is None:\n            raise RuntimeError(\n                'xlutils is not installed,'\n                'Please use “pip install xlutils==2.0.0” to install')\n        readbook = xlrd.open_workbook(args.excel)\n        sheet = readbook.sheet_by_name('Sheet1')\n        sheet_info = {}\n        total_nrows = sheet.nrows\n        for i in range(3, sheet.nrows):\n            sheet_info[sheet.row_values(i)[0]] = i\n        xlrw = copy(readbook)\n        table = xlrw.get_sheet(0)\n\n    root_path = args.root\n    metrics_out = args.out\n\n    result_dict = {}\n    with open(args.txt_path, 'r') as f:\n        model_cfgs = f.readlines()\n        for i, config in enumerate(model_cfgs):\n            config = config.strip()\n            if len(config) == 0:\n                continue\n\n            config_name = osp.split(config)[-1]\n            config_name = osp.splitext(config_name)[0]\n            result_path = osp.join(root_path, config_name)\n            if osp.exists(result_path):\n                # 1 read config\n                cfg = mmcv.Config.fromfile(config)\n                total_epochs = cfg.runner.max_epochs\n                final_results = cfg.evaluation.metric\n                if not isinstance(final_results, list):\n                    final_results = [final_results]\n                final_results_out = []\n                for key in final_results:\n                    if 'proposal_fast' in key:\n                        final_results_out.append('AR@1000')  # RPN\n                    elif 'mAP' not in key:\n                        final_results_out.append(key + '_mAP')\n\n                # 2 determine whether total_epochs ckpt exists\n                ckpt_path = f'epoch_{total_epochs}.pth'\n                if osp.exists(osp.join(result_path, ckpt_path)):\n                    log_json_path = list(\n                        sorted(glob.glob(osp.join(result_path,\n                                                  '*.log.json'))))[-1]\n\n                    # 3 read metric\n                    model_performance = get_final_results(\n                        log_json_path, total_epochs, final_results_out)\n                    if model_performance is None:\n                        print(f'log file error: {log_json_path}')\n                        continue\n                    for performance in model_performance:\n                        if performance in ['AR@1000', 'bbox_mAP', 'segm_mAP']:\n                            metric = round(\n                                model_performance[performance] * 100, 1)\n                            model_performance[performance] = metric\n                    result_dict[config] = model_performance\n\n                    # update and append excel content\n                    if args.excel:\n                        if 'AR@1000' in model_performance:\n                            metrics = f'{model_performance[\"AR@1000\"]}' \\\n                                      f'(AR@1000)'\n                        elif 'segm_mAP' in model_performance:\n                            metrics = f'{model_performance[\"bbox_mAP\"]}/' \\\n                                      f'{model_performance[\"segm_mAP\"]}'\n                        else:\n                            metrics = f'{model_performance[\"bbox_mAP\"]}'\n\n                        row_num = sheet_info.get(config, None)\n                        if row_num:\n                            table.write(row_num, args.ncol, metrics)\n                        else:\n                            table.write(total_nrows, 0, config)\n                            table.write(total_nrows, args.ncol, metrics)\n                            total_nrows += 1\n\n                else:\n                    print(f'{config} not exist: {ckpt_path}')\n            else:\n                print(f'not exist: {config}')\n\n        # 4 save or print results\n        if metrics_out:\n            mmcv.mkdir_or_exist(metrics_out)\n            mmcv.dump(result_dict,\n                      osp.join(metrics_out, 'model_metric_info.json'))\n        if not args.not_show:\n            print('===================================')\n            for config_name, metrics in result_dict.items():\n                print(config_name, metrics)\n            print('===================================')\n        if args.excel:\n            filename, sufflx = osp.splitext(args.excel)\n            xlrw.save(f'{filename}_o{sufflx}')\n            print(f'>>> Output {filename}_o{sufflx}')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/linter.sh",
    "content": "yapf -r -i mmdet/ configs/ tests/ tools/\nisort -rc mmdet/ configs/ tests/ tools/\nflake8 .\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/test_benchmark.sh",
    "content": "PARTITION=$1\nCHECKPOINT_DIR=$2\n\necho 'configs/atss/atss_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION atss_r50_fpn_1x_coco configs/atss/atss_r50_fpn_1x_coco.py $CHECKPOINT_DIR/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth --work-dir tools/batch_test/atss_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29666  &\necho 'configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION autoassign_r50_fpn_8x2_1x_coco configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py $CHECKPOINT_DIR/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth --work-dir tools/batch_test/autoassign_r50_fpn_8x2_1x_coco --eval bbox --cfg-option dist_params.port=29667  &\necho 'configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_fpn_carafe_1x_coco configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth --work-dir tools/batch_test/faster_rcnn_r50_fpn_carafe_1x_coco --eval bbox --cfg-option dist_params.port=29668  &\necho 'configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION cascade_rcnn_r50_fpn_1x_coco configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth --work-dir tools/batch_test/cascade_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29669  &\necho 'configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION cascade_mask_rcnn_r50_fpn_1x_coco configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth --work-dir tools/batch_test/cascade_mask_rcnn_r50_fpn_1x_coco --eval bbox segm --cfg-option dist_params.port=29670  &\necho 'configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION crpn_faster_rcnn_r50_caffe_fpn_1x_coco configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py $CHECKPOINT_DIR/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth --work-dir tools/batch_test/crpn_faster_rcnn_r50_caffe_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29671  &\necho 'configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION centripetalnet_hourglass104_mstest_16x6_210e_coco configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py $CHECKPOINT_DIR/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth --work-dir tools/batch_test/centripetalnet_hourglass104_mstest_16x6_210e_coco --eval bbox --cfg-option dist_params.port=29672  &\necho 'configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION cornernet_hourglass104_mstest_8x6_210e_coco configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py $CHECKPOINT_DIR/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth --work-dir tools/batch_test/cornernet_hourglass104_mstest_8x6_210e_coco --eval bbox --cfg-option dist_params.port=29673  &\necho 'configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth --work-dir tools/batch_test/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco --eval bbox --cfg-option dist_params.port=29674  &\necho 'configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION deformable_detr_r50_16x2_50e_coco configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py $CHECKPOINT_DIR/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth --work-dir tools/batch_test/deformable_detr_r50_16x2_50e_coco --eval bbox --cfg-option dist_params.port=29675  &\necho 'configs/detectors/detectors_htc_r50_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION detectors_htc_r50_1x_coco configs/detectors/detectors_htc_r50_1x_coco.py $CHECKPOINT_DIR/detectors_htc_r50_1x_coco-329b1453.pth --work-dir tools/batch_test/detectors_htc_r50_1x_coco --eval bbox segm --cfg-option dist_params.port=29676  &\necho 'configs/detr/detr_r50_8x2_150e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION detr_r50_8x2_150e_coco configs/detr/detr_r50_8x2_150e_coco.py $CHECKPOINT_DIR/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth --work-dir tools/batch_test/detr_r50_8x2_150e_coco --eval bbox --cfg-option dist_params.port=29677  &\necho 'configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION dh_faster_rcnn_r50_fpn_1x_coco configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth --work-dir tools/batch_test/dh_faster_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29678  &\necho 'configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION dynamic_rcnn_r50_fpn_1x_coco configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/dynamic_rcnn_r50_fpn_1x-62a3f276.pth --work-dir tools/batch_test/dynamic_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29679  &\necho 'configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_fpn_attention_1111_1x_coco configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth --work-dir tools/batch_test/faster_rcnn_r50_fpn_attention_1111_1x_coco --eval bbox --cfg-option dist_params.port=29680  &\necho 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_fpn_1x_coco configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth --work-dir tools/batch_test/faster_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29681  &\necho 'configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py $CHECKPOINT_DIR/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth --work-dir tools/batch_test/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco --eval bbox --cfg-option dist_params.port=29682  &\necho 'configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fovea_align_r50_fpn_gn-head_4x4_2x_coco configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py $CHECKPOINT_DIR/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth --work-dir tools/batch_test/fovea_align_r50_fpn_gn-head_4x4_2x_coco --eval bbox --cfg-option dist_params.port=29683  &\necho 'configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION retinanet_free_anchor_r50_fpn_1x_coco configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py $CHECKPOINT_DIR/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth --work-dir tools/batch_test/retinanet_free_anchor_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29684  &\necho 'configs/fsaf/fsaf_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION fsaf_r50_fpn_1x_coco configs/fsaf/fsaf_r50_fpn_1x_coco.py $CHECKPOINT_DIR/fsaf_r50_fpn_1x_coco-94ccc51f.pth --work-dir tools/batch_test/fsaf_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29685  &\necho 'configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py $CHECKPOINT_DIR/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth --work-dir tools/batch_test/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco --eval bbox segm --cfg-option dist_params.port=29686  &\necho 'configs/gfl/gfl_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION gfl_r50_fpn_1x_coco configs/gfl/gfl_r50_fpn_1x_coco.py $CHECKPOINT_DIR/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth --work-dir tools/batch_test/gfl_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29687  &\necho 'configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION mask_rcnn_r50_fpn_gn-all_2x_coco configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py $CHECKPOINT_DIR/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth --work-dir tools/batch_test/mask_rcnn_r50_fpn_gn-all_2x_coco --eval bbox segm --cfg-option dist_params.port=29688  &\necho 'configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_fpn_gn_ws-all_1x_coco configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth --work-dir tools/batch_test/faster_rcnn_r50_fpn_gn_ws-all_1x_coco --eval bbox --cfg-option dist_params.port=29689  &\necho 'configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION grid_rcnn_r50_fpn_gn-head_2x_coco configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py $CHECKPOINT_DIR/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth --work-dir tools/batch_test/grid_rcnn_r50_fpn_gn-head_2x_coco --eval bbox --cfg-option dist_params.port=29690  &\necho 'configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_fpn_groie_1x_coco configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth --work-dir tools/batch_test/faster_rcnn_r50_fpn_groie_1x_coco --eval bbox --cfg-option dist_params.port=29691  &\necho 'configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION ga_retinanet_r50_caffe_fpn_1x_coco configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py $CHECKPOINT_DIR/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth --work-dir tools/batch_test/ga_retinanet_r50_caffe_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29692  &\necho 'configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION ga_faster_r50_caffe_fpn_1x_coco configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py $CHECKPOINT_DIR/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth --work-dir tools/batch_test/ga_faster_r50_caffe_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29693  &\necho 'configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_hrnetv2p_w18_1x_coco configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth --work-dir tools/batch_test/faster_rcnn_hrnetv2p_w18_1x_coco --eval bbox --cfg-option dist_params.port=29694  &\necho 'configs/htc/htc_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION htc_r50_fpn_1x_coco configs/htc/htc_r50_fpn_1x_coco.py $CHECKPOINT_DIR/htc_r50_fpn_1x_coco_20200317-7332cf16.pth --work-dir tools/batch_test/htc_r50_fpn_1x_coco --eval bbox segm --cfg-option dist_params.port=29695  &\necho 'configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION libra_faster_rcnn_r50_fpn_1x_coco configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth --work-dir tools/batch_test/libra_faster_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29696  &\necho 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION mask_rcnn_r50_fpn_1x_coco configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth --work-dir tools/batch_test/mask_rcnn_r50_fpn_1x_coco --eval bbox segm --cfg-option dist_params.port=29697  &\necho 'configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION ms_rcnn_r50_caffe_fpn_1x_coco configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py $CHECKPOINT_DIR/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth --work-dir tools/batch_test/ms_rcnn_r50_caffe_fpn_1x_coco --eval bbox segm --cfg-option dist_params.port=29698  &\necho 'configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py $CHECKPOINT_DIR/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth --work-dir tools/batch_test/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco --eval bbox --cfg-option dist_params.port=29699  &\necho 'configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION retinanet_r50_nasfpn_crop640_50e_coco configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py $CHECKPOINT_DIR/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth --work-dir tools/batch_test/retinanet_r50_nasfpn_crop640_50e_coco --eval bbox --cfg-option dist_params.port=29700  &\necho 'configs/paa/paa_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION paa_r50_fpn_1x_coco configs/paa/paa_r50_fpn_1x_coco.py $CHECKPOINT_DIR/paa_r50_fpn_1x_coco_20200821-936edec3.pth --work-dir tools/batch_test/paa_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29701  &\necho 'configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r50_pafpn_1x_coco configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth --work-dir tools/batch_test/faster_rcnn_r50_pafpn_1x_coco --eval bbox --cfg-option dist_params.port=29702  &\necho 'configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION pisa_faster_rcnn_r50_fpn_1x_coco configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth --work-dir tools/batch_test/pisa_faster_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29703  &\necho 'configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION point_rend_r50_caffe_fpn_mstrain_1x_coco configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py $CHECKPOINT_DIR/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth --work-dir tools/batch_test/point_rend_r50_caffe_fpn_mstrain_1x_coco --eval bbox segm --cfg-option dist_params.port=29704  &\necho 'configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION mask_rcnn_regnetx-3.2GF_fpn_1x_coco configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py $CHECKPOINT_DIR/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth --work-dir tools/batch_test/mask_rcnn_regnetx-3.2GF_fpn_1x_coco --eval bbox segm --cfg-option dist_params.port=29705  &\necho 'configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION reppoints_moment_r50_fpn_1x_coco configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py $CHECKPOINT_DIR/reppoints_moment_r50_fpn_1x_coco_20200330-b73db8d1.pth --work-dir tools/batch_test/reppoints_moment_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29706  &\necho 'configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_r2_101_fpn_2x_coco configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py $CHECKPOINT_DIR/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth --work-dir tools/batch_test/faster_rcnn_r2_101_fpn_2x_coco --eval bbox --cfg-option dist_params.port=29707  &\necho 'configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py $CHECKPOINT_DIR/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco_20200926_125502-20289c16.pth --work-dir tools/batch_test/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco --eval bbox --cfg-option dist_params.port=29708  &\necho 'configs/retinanet/retinanet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION retinanet_r50_fpn_1x_coco configs/retinanet/retinanet_r50_fpn_1x_coco.py $CHECKPOINT_DIR/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth --work-dir tools/batch_test/retinanet_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29709  &\necho 'configs/rpn/rpn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION rpn_r50_fpn_1x_coco configs/rpn/rpn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/rpn_r50_fpn_1x_coco_20200218-5525fa2e.pth --work-dir tools/batch_test/rpn_r50_fpn_1x_coco --eval proposal_fast --cfg-option dist_params.port=29710  &\necho 'configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION sabl_retinanet_r50_fpn_1x_coco configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py $CHECKPOINT_DIR/sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth --work-dir tools/batch_test/sabl_retinanet_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29711  &\necho 'configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION sabl_faster_rcnn_r50_fpn_1x_coco configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/sabl_faster_rcnn_r50_fpn_1x_coco-e867595b.pth --work-dir tools/batch_test/sabl_faster_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29712  &\necho 'configs/scnet/scnet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION scnet_r50_fpn_1x_coco configs/scnet/scnet_r50_fpn_1x_coco.py $CHECKPOINT_DIR/scnet_r50_fpn_1x_coco-c3f09857.pth --work-dir tools/batch_test/scnet_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29713  &\necho 'configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION sparse_rcnn_r50_fpn_1x_coco configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py $CHECKPOINT_DIR/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.pth --work-dir tools/batch_test/sparse_rcnn_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29714  &\necho 'configs/ssd/ssd300_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION ssd300_coco configs/ssd/ssd300_coco.py $CHECKPOINT_DIR/ssd300_coco_20210803_015428-d231a06e.pth --work-dir tools/batch_test/ssd300_coco --eval bbox --cfg-option dist_params.port=29715  &\necho 'configs/tridentnet/tridentnet_r50_caffe_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION tridentnet_r50_caffe_1x_coco configs/tridentnet/tridentnet_r50_caffe_1x_coco.py $CHECKPOINT_DIR/tridentnet_r50_caffe_1x_coco_20201230_141838-2ec0b530.pth --work-dir tools/batch_test/tridentnet_r50_caffe_1x_coco --eval bbox --cfg-option dist_params.port=29716  &\necho 'configs/vfnet/vfnet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION vfnet_r50_fpn_1x_coco configs/vfnet/vfnet_r50_fpn_1x_coco.py $CHECKPOINT_DIR/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth --work-dir tools/batch_test/vfnet_r50_fpn_1x_coco --eval bbox --cfg-option dist_params.port=29717  &\necho 'configs/yolact/yolact_r50_1x8_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION yolact_r50_1x8_coco configs/yolact/yolact_r50_1x8_coco.py $CHECKPOINT_DIR/yolact_r50_1x8_coco_20200908-f38d58df.pth --work-dir tools/batch_test/yolact_r50_1x8_coco --eval bbox segm --cfg-option dist_params.port=29718  &\necho 'configs/yolo/yolov3_d53_320_273e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION yolov3_d53_320_273e_coco configs/yolo/yolov3_d53_320_273e_coco.py $CHECKPOINT_DIR/yolov3_d53_320_273e_coco-421362b6.pth --work-dir tools/batch_test/yolov3_d53_320_273e_coco --eval bbox --cfg-option dist_params.port=29719  &\necho 'configs/yolof/yolof_r50_c5_8x8_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION yolof_r50_c5_8x8_1x_coco configs/yolof/yolof_r50_c5_8x8_1x_coco.py $CHECKPOINT_DIR/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth --work-dir tools/batch_test/yolof_r50_c5_8x8_1x_coco --eval bbox --cfg-option dist_params.port=29720  &\necho 'configs/centernet/centernet_resnet18_dcnv2_140e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION centernet_resnet18_dcnv2_140e_coco configs/centernet/centernet_resnet18_dcnv2_140e_coco.py $CHECKPOINT_DIR/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth --work-dir tools/batch_test/centernet_resnet18_dcnv2_140e_coco --eval bbox --cfg-option dist_params.port=29721  &\necho 'configs/yolox/yolox_tiny_8x8_300e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION yolox_tiny_8x8_300e_coco configs/yolox/yolox_tiny_8x8_300e_coco.py $CHECKPOINT_DIR/yolox_tiny_8x8_300e_coco_20210806_234250-4ff3b67e.pth --work-dir tools/batch_test/yolox_tiny_8x8_300e_coco --eval bbox --cfg-option dist_params.port=29722  &\necho 'configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 tools/slurm_test.sh $PARTITION ssdlite_mobilenetv2_scratch_600e_coco configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py $CHECKPOINT_DIR/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627-974d9307.pth --work-dir tools/batch_test/ssdlite_mobilenetv2_scratch_600e_coco --eval bbox --cfg-option dist_params.port=29723  &\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/test_init_backbone.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"Check out backbone whether successfully load pretrained checkpoint.\"\"\"\nimport copy\nimport os\nfrom os.path import dirname, exists, join\n\nimport pytest\nfrom mmcv import Config, ProgressBar\nfrom mmcv.runner import _load_checkpoint\n\nfrom mmdet.models import build_detector\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection repo\n        repo_dpath = dirname(dirname(__file__))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet\n        repo_dpath = dirname(dirname(mmdet.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef _get_detector_cfg(fname):\n    \"\"\"Grab configs necessary to create a detector.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    return model\n\n\ndef _traversed_config_file():\n    \"\"\"We traversed all potential config files under the `config` file. If you\n    need to print details or debug code, you can use this function.\n\n    If the `backbone.init_cfg` is None (do not use `Pretrained` init way), you\n    need add the folder name in `ignores_folder` (if the config files in this\n    folder all set backbone.init_cfg is None) or add config name in\n    `ignores_file` (if the config file set backbone.init_cfg is None)\n    \"\"\"\n    config_path = _get_config_directory()\n    check_cfg_names = []\n\n    # `base`, `legacy_1.x` and `common` ignored by default.\n    ignores_folder = ['_base_', 'legacy_1.x', 'common']\n    # 'ld' need load teacher model, if want to check 'ld',\n    # please check teacher_config path first.\n    ignores_folder += ['ld']\n    # `selfsup_pretrain` need convert model, if want to check this model,\n    # need to convert the model first.\n    ignores_folder += ['selfsup_pretrain']\n\n    # the `init_cfg` in 'centripetalnet', 'cornernet', 'cityscapes',\n    # 'scratch' is None.\n    # the `init_cfg` in ssdlite(`ssdlite_mobilenetv2_scratch_600e_coco.py`)\n    # is None\n    # Please confirm `bockbone.init_cfg` is None first.\n    ignores_folder += ['centripetalnet', 'cornernet', 'cityscapes', 'scratch']\n    ignores_file = ['ssdlite_mobilenetv2_scratch_600e_coco.py']\n\n    for config_file_name in os.listdir(config_path):\n        if config_file_name not in ignores_folder:\n            config_file = join(config_path, config_file_name)\n            if os.path.isdir(config_file):\n                for config_sub_file in os.listdir(config_file):\n                    if config_sub_file.endswith('py') and \\\n                            config_sub_file not in ignores_file:\n                        name = join(config_file, config_sub_file)\n                        check_cfg_names.append(name)\n    return check_cfg_names\n\n\ndef _check_backbone(config, print_cfg=True):\n    \"\"\"Check out backbone whether successfully load pretrained model, by using\n    `backbone.init_cfg`.\n\n    First, using `mmcv._load_checkpoint` to load the checkpoint without\n        loading models.\n    Then, using `build_detector` to build models, and using\n        `model.init_weights()` to initialize the parameters.\n    Finally, assert weights and bias of each layer loaded from pretrained\n        checkpoint are equal to the weights and bias of original checkpoint.\n        For the convenience of comparison, we sum up weights and bias of\n        each loaded layer separately.\n\n    Args:\n        config (str): Config file path.\n        print_cfg (bool): Whether print logger and return the result.\n\n    Returns:\n        results (str or None): If backbone successfully load pretrained\n            checkpoint, return None; else, return config file path.\n    \"\"\"\n    if print_cfg:\n        print('-' * 15 + 'loading ', config)\n    cfg = Config.fromfile(config)\n    init_cfg = None\n    try:\n        init_cfg = cfg.model.backbone.init_cfg\n        init_flag = True\n    except AttributeError:\n        init_flag = False\n    if init_cfg is None or init_cfg.get('type') != 'Pretrained':\n        init_flag = False\n    if init_flag:\n        checkpoint = _load_checkpoint(init_cfg.checkpoint)\n        if 'state_dict' in checkpoint:\n            state_dict = checkpoint['state_dict']\n        else:\n            state_dict = checkpoint\n\n        model = build_detector(\n            cfg.model,\n            train_cfg=cfg.get('train_cfg'),\n            test_cfg=cfg.get('test_cfg'))\n        model.init_weights()\n\n        checkpoint_layers = state_dict.keys()\n        for name, value in model.backbone.state_dict().items():\n            if name in checkpoint_layers:\n                assert value.equal(state_dict[name])\n\n        if print_cfg:\n            print('-' * 10 + 'Successfully load checkpoint' + '-' * 10 +\n                  '\\n', )\n            return None\n    else:\n        if print_cfg:\n            print(config + '\\n' + '-' * 10 +\n                  'config file do not have init_cfg' + '-' * 10 + '\\n')\n            return config\n\n\n@pytest.mark.parametrize('config', _traversed_config_file())\ndef test_load_pretrained(config):\n    \"\"\"Check out backbone whether successfully load pretrained model by using\n    `backbone.init_cfg`.\n\n    Details please refer to `_check_backbone`\n    \"\"\"\n    _check_backbone(config, print_cfg=False)\n\n\ndef _test_load_pretrained():\n    \"\"\"We traversed all potential config files under the `config` file. If you\n    need to print details or debug code, you can use this function.\n\n    Returns:\n        check_cfg_names (list[str]): Config files that backbone initialized\n        from pretrained checkpoint might be problematic. Need to recheck\n        the config file. The output including the config files that the\n        backbone.init_cfg is None\n    \"\"\"\n    check_cfg_names = _traversed_config_file()\n    need_check_cfg = []\n\n    prog_bar = ProgressBar(len(check_cfg_names))\n    for config in check_cfg_names:\n        init_cfg_name = _check_backbone(config)\n        if init_cfg_name is not None:\n            need_check_cfg.append(init_cfg_name)\n        prog_bar.update()\n    print('These config files need to be checked again')\n    print(need_check_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.dev_scripts/train_benchmark.sh",
    "content": "echo 'configs/atss/atss_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab atss_r50_fpn_1x_coco configs/atss/atss_r50_fpn_1x_coco.py ./tools/work_dir/atss_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab autoassign_r50_fpn_8x2_1x_coco configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py ./tools/work_dir/autoassign_r50_fpn_8x2_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab cascade_mask_rcnn_r50_fpn_1x_coco configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/cascade_mask_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab crpn_faster_rcnn_r50_caffe_fpn_1x_coco configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py ./tools/work_dir/crpn_faster_rcnn_r50_caffe_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/centernet/centernet_resnet18_dcnv2_140e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab centernet_resnet18_dcnv2_140e_coco configs/centernet/centernet_resnet18_dcnv2_140e_coco.py ./tools/work_dir/centernet_resnet18_dcnv2_140e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py' &\nGPUS=16  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab centripetalnet_hourglass104_mstest_16x6_210e_coco configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py ./tools/work_dir/centripetalnet_hourglass104_mstest_16x6_210e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab cornernet_hourglass104_mstest_8x6_210e_coco configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py ./tools/work_dir/cornernet_hourglass104_mstest_8x6_210e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/detectors/detectors_htc_r50_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab detectors_htc_r50_1x_coco configs/detectors/detectors_htc_r50_1x_coco.py ./tools/work_dir/detectors_htc_r50_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py' &\nGPUS=16  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab deformable_detr_r50_16x2_50e_coco configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py ./tools/work_dir/deformable_detr_r50_16x2_50e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/detr/detr_r50_8x2_150e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab detr_r50_8x2_150e_coco configs/detr/detr_r50_8x2_150e_coco.py ./tools/work_dir/detr_r50_8x2_150e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab dh_faster_rcnn_r50_fpn_1x_coco configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/dh_faster_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab dynamic_rcnn_r50_fpn_1x_coco configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/dynamic_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_fpn_1x_coco configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/faster_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_caffe_dc5_mstrain_1x_coco configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py ./tools/work_dir/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_caffe_fpn_mstrain_1x_coco configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py ./tools/work_dir/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_caffe_fpn_1x_coco configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py ./tools/work_dir/faster_rcnn_r50_caffe_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_fpn_ohem_1x_coco configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py ./tools/work_dir/faster_rcnn_r50_fpn_ohem_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py' &\nGPUS=4  GPUS_PER_NODE=4  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab fovea_align_r50_fpn_gn-head_4x4_2x_coco configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py ./tools/work_dir/fovea_align_r50_fpn_gn-head_4x4_2x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_fp16_1x_coco configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_fp16_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab retinanet_r50_fpn_fp16_1x_coco configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py ./tools/work_dir/retinanet_r50_fpn_fp16_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab retinanet_free_anchor_r50_fpn_1x_coco configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py ./tools/work_dir/retinanet_free_anchor_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/fsaf/fsaf_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab fsaf_r50_fpn_1x_coco configs/fsaf/fsaf_r50_fpn_1x_coco.py ./tools/work_dir/fsaf_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/gfl/gfl_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab gfl_r50_fpn_1x_coco configs/gfl/gfl_r50_fpn_1x_coco.py ./tools/work_dir/gfl_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab retinanet_ghm_r50_fpn_1x_coco configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py ./tools/work_dir/retinanet_ghm_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab grid_rcnn_r50_fpn_gn-head_2x_coco configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py ./tools/work_dir/grid_rcnn_r50_fpn_gn-head_2x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab ga_faster_r50_caffe_fpn_1x_coco configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py ./tools/work_dir/ga_faster_r50_caffe_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/htc/htc_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab htc_r50_fpn_1x_coco configs/htc/htc_r50_fpn_1x_coco.py ./tools/work_dir/htc_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab ld_r18_gflv1_r101_fpn_coco_1x configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py ./tools/work_dir/ld_r18_gflv1_r101_fpn_coco_1x --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab libra_faster_rcnn_r50_fpn_1x_coco configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/libra_faster_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py ./tools/work_dir/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab ms_rcnn_r50_caffe_fpn_1x_coco configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py ./tools/work_dir/ms_rcnn_r50_caffe_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py' &\nGPUS=4  GPUS_PER_NODE=4  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py ./tools/work_dir/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/paa/paa_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab paa_r50_fpn_1x_coco configs/paa/paa_r50_fpn_1x_coco.py ./tools/work_dir/paa_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab pisa_mask_rcnn_r50_fpn_1x_coco configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/pisa_mask_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab point_rend_r50_caffe_fpn_mstrain_1x_coco configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py ./tools/work_dir/point_rend_r50_caffe_fpn_mstrain_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab reppoints_moment_r50_fpn_gn-neck+head_1x_coco configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py ./tools/work_dir/reppoints_moment_r50_fpn_gn-neck+head_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab retinanet_r50_caffe_fpn_1x_coco configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py ./tools/work_dir/retinanet_r50_caffe_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/rpn/rpn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab rpn_r50_fpn_1x_coco configs/rpn/rpn_r50_fpn_1x_coco.py ./tools/work_dir/rpn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab sabl_retinanet_r50_fpn_1x_coco configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py ./tools/work_dir/sabl_retinanet_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/ssd/ssd300_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab ssd300_coco configs/ssd/ssd300_coco.py ./tools/work_dir/ssd300_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/tridentnet/tridentnet_r50_caffe_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab tridentnet_r50_caffe_1x_coco configs/tridentnet/tridentnet_r50_caffe_1x_coco.py ./tools/work_dir/tridentnet_r50_caffe_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/vfnet/vfnet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab vfnet_r50_fpn_1x_coco configs/vfnet/vfnet_r50_fpn_1x_coco.py ./tools/work_dir/vfnet_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/yolact/yolact_r50_8x8_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab yolact_r50_8x8_coco configs/yolact/yolact_r50_8x8_coco.py ./tools/work_dir/yolact_r50_8x8_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/yolo/yolov3_d53_320_273e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab yolov3_d53_320_273e_coco configs/yolo/yolov3_d53_320_273e_coco.py ./tools/work_dir/yolov3_d53_320_273e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab sparse_rcnn_r50_fpn_1x_coco configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py ./tools/work_dir/sparse_rcnn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/scnet/scnet_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab scnet_r50_fpn_1x_coco configs/scnet/scnet_r50_fpn_1x_coco.py ./tools/work_dir/scnet_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/yolof/yolof_r50_c5_8x8_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab yolof_r50_c5_8x8_1x_coco configs/yolof/yolof_r50_c5_8x8_1x_coco.py ./tools/work_dir/yolof_r50_c5_8x8_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_carafe_1x_coco configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_carafe_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_fpn_mdpool_1x_coco configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py ./tools/work_dir/faster_rcnn_r50_fpn_mdpool_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_fpn_dpool_1x_coco configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py ./tools/work_dir/faster_rcnn_r50_fpn_dpool_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py ./tools/work_dir/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_gn-all_2x_coco configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_gn-all_2x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_gn_ws-all_2x_coco configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py ./tools/work_dir/mask_rcnn_r50_fpn_gn_ws-all_2x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_hrnetv2p_w18_1x_coco configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py ./tools/work_dir/mask_rcnn_hrnetv2p_w18_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_pafpn_1x_coco configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py ./tools/work_dir/faster_rcnn_r50_pafpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab retinanet_r50_nasfpn_crop640_50e_coco configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py ./tools/work_dir/retinanet_r50_nasfpn_crop640_50e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_regnetx-3.2GF_fpn_1x_coco configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py ./tools/work_dir/mask_rcnn_regnetx-3.2GF_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py ./tools/work_dir/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r2_101_fpn_2x_coco configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py ./tools/work_dir/faster_rcnn_r2_101_fpn_2x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab faster_rcnn_r50_fpn_groie_1x_coco configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py ./tools/work_dir/faster_rcnn_r50_fpn_groie_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab mask_rcnn_r50_fpn_1x_cityscapes configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py ./tools/work_dir/mask_rcnn_r50_fpn_1x_cityscapes --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab panoptic_fpn_r50_fpn_1x_coco configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py ./tools/work_dir/panoptic_fpn_r50_fpn_1x_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/yolox/yolox_tiny_8x8_300e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab yolox_tiny_8x8_300e_coco configs/yolox/yolox_tiny_8x8_300e_coco.py ./tools/work_dir/yolox_tiny_8x8_300e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\necho 'configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py' &\nGPUS=8  GPUS_PER_NODE=8  CPUS_PER_TASK=2 ./tools/slurm_train.sh openmmlab ssdlite_mobilenetv2_scratch_600e_coco configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py ./tools/work_dir/ssdlite_mobilenetv2_scratch_600e_coco --cfg-options checkpoint_config.max_keep_ckpts=1 >/dev/null &\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.gitignore",
    "content": "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n.hypothesis/\n.pytest_cache/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\n\n# Flask stuff:\ninstance/\n.webassets-cache\n\n# Scrapy stuff:\n.scrapy\n\n# Sphinx documentation\ndocs/en/_build/\ndocs/zh_cn/_build/\n\n# PyBuilder\ntarget/\n\n# Jupyter Notebook\n.ipynb_checkpoints\n\n# pyenv\n.python-version\n\n# celery beat schedule file\ncelerybeat-schedule\n\n# SageMath parsed files\n*.sage.py\n\n# Environments\n.env\n.venv\nenv/\nvenv/\nENV/\nenv.bak/\nvenv.bak/\n\n# Spyder project settings\n.spyderproject\n.spyproject\n\n# Rope project settings\n.ropeproject\n\n# mkdocs documentation\n/site\n\n# mypy\n.mypy_cache/\n\ndata/\ndata\n.vscode\n.idea\n.DS_Store\n\n# custom\n*.pkl\n*.pkl.json\n*.log.json\ndocs/modelzoo_statistics.md\nmmdet/.mim\nwork_dirs/\n\n# Pytorch\n*.pth\n*.py~\n*.sh~\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.owners.yml",
    "content": "assign:\n  strategy:\n    # random\n    daily-shift-based\n  scedule: \"*/1 * * * *\"\n  assignees:\n    - Czm369\n    - hhaAndroid\n    - zytx121\n    - RangiLyu\n    - BIGWangYuDong\n    - chhluo\n    - ZwwWayne\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.pre-commit-config.yaml",
    "content": "repos:\n  - repo: https://github.com/PyCQA/flake8\n    rev: 5.0.4\n    hooks:\n      - id: flake8\n  - repo: https://github.com/PyCQA/isort\n    rev: 5.10.1\n    hooks:\n      - id: isort\n  - repo: https://github.com/pre-commit/mirrors-yapf\n    rev: v0.32.0\n    hooks:\n      - id: yapf\n  - repo: https://github.com/pre-commit/pre-commit-hooks\n    rev: v4.3.0\n    hooks:\n      - id: trailing-whitespace\n      - id: check-yaml\n      - id: end-of-file-fixer\n      - id: requirements-txt-fixer\n      - id: double-quote-string-fixer\n      - id: check-merge-conflict\n      - id: fix-encoding-pragma\n        args: [\"--remove\"]\n      - id: mixed-line-ending\n        args: [\"--fix=lf\"]\n  - repo: https://github.com/codespell-project/codespell\n    rev: v2.2.1\n    hooks:\n      - id: codespell\n  - repo: https://github.com/executablebooks/mdformat\n    rev: 0.7.9\n    hooks:\n      - id: mdformat\n        args: [\"--number\"]\n        additional_dependencies:\n          - mdformat-openmmlab\n          - mdformat_frontmatter\n          - linkify-it-py\n  - repo: https://github.com/myint/docformatter\n    rev: v1.3.1\n    hooks:\n      - id: docformatter\n        args: [\"--in-place\", \"--wrap-descriptions\", \"79\"]\n  - repo: https://github.com/open-mmlab/pre-commit-hooks\n    rev: v0.2.0  # Use the ref you want to point at\n    hooks:\n      - id: check-algo-readme\n      - id: check-copyright\n        args: [\"mmdet\"]  # replace the dir_to_check with your expected directory to check\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/.readthedocs.yml",
    "content": "version: 2\n\nformats: all\n\npython:\n  version: 3.7\n  install:\n    - requirements: requirements/docs.txt\n    - requirements: requirements/readthedocs.txt\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/CITATION.cff",
    "content": "cff-version: 1.2.0\nmessage: \"If you use this software, please cite it as below.\"\nauthors:\n  - name: \"MMDetection Contributors\"\ntitle: \"OpenMMLab Detection Toolbox and Benchmark\"\ndate-released: 2018-08-22\nurl: \"https://github.com/open-mmlab/mmdetection\"\nlicense: Apache-2.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/LICENSE",
    "content": "Copyright 2018-2023 OpenMMLab. All rights reserved.\n\n                                 Apache License\n                           Version 2.0, January 2004\n                        http://www.apache.org/licenses/\n\n   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION\n\n   1. Definitions.\n\n      \"License\" shall mean the terms and conditions for use, reproduction,\n      and distribution as defined by Sections 1 through 9 of this document.\n\n      \"Licensor\" shall mean the copyright owner or entity authorized by\n      the copyright owner that is granting the License.\n\n      \"Legal Entity\" shall mean the union of the acting entity and all\n      other entities that control, are controlled by, or are under common\n      control with that entity. For the purposes of this definition,\n      \"control\" means (i) the power, direct or indirect, to cause the\n      direction or management of such entity, whether by contract or\n      otherwise, or (ii) ownership of fifty percent (50%) or more of the\n      outstanding shares, or (iii) beneficial ownership of such entity.\n\n      \"You\" (or \"Your\") shall mean an individual or Legal Entity\n      exercising permissions granted by this License.\n\n      \"Source\" form shall mean the preferred form for making modifications,\n      including but not limited to software source code, documentation\n      source, and configuration files.\n\n      \"Object\" form shall mean any form resulting from mechanical\n      transformation or translation of a Source form, including but\n      not limited to compiled object code, generated documentation,\n      and conversions to other media types.\n\n      \"Work\" shall mean the work of authorship, whether in Source or\n      Object form, made available under the License, as indicated by a\n      copyright notice that is included in or attached to the work\n      (an example is provided in the Appendix below).\n\n      \"Derivative Works\" shall mean any work, whether in Source or Object\n      form, that is based on (or derived from) the Work and for which the\n      editorial revisions, annotations, elaborations, or other modifications\n      represent, as a whole, an original work of authorship. For the purposes\n      of this License, Derivative Works shall not include works that remain\n      separable from, or merely link (or bind by name) to the interfaces of,\n      the Work and Derivative Works thereof.\n\n      \"Contribution\" shall mean any work of authorship, including\n      the original version of the Work and any modifications or additions\n      to that Work or Derivative Works thereof, that is intentionally\n      submitted to Licensor for inclusion in the Work by the copyright owner\n      or by an individual or Legal Entity authorized to submit on behalf of\n      the copyright owner. For the purposes of this definition, \"submitted\"\n      means any form of electronic, verbal, or written communication sent\n      to the Licensor or its representatives, including but not limited to\n      communication on electronic mailing lists, source code control systems,\n      and issue tracking systems that are managed by, or on behalf of, the\n      Licensor for the purpose of discussing and improving the Work, but\n      excluding communication that is conspicuously marked or otherwise\n      designated in writing by the copyright owner as \"Not a Contribution.\"\n\n      \"Contributor\" shall mean Licensor and any individual or Legal Entity\n      on behalf of whom a Contribution has been received by Licensor and\n      subsequently incorporated within the Work.\n\n   2. Grant of Copyright License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      copyright license to reproduce, prepare Derivative Works of,\n      publicly display, publicly perform, sublicense, and distribute the\n      Work and such Derivative Works in Source or Object form.\n\n   3. Grant of Patent License. Subject to the terms and conditions of\n      this License, each Contributor hereby grants to You a perpetual,\n      worldwide, non-exclusive, no-charge, royalty-free, irrevocable\n      (except as stated in this section) patent license to make, have made,\n      use, offer to sell, sell, import, and otherwise transfer the Work,\n      where such license applies only to those patent claims licensable\n      by such Contributor that are necessarily infringed by their\n      Contribution(s) alone or by combination of their Contribution(s)\n      with the Work to which such Contribution(s) was submitted. If You\n      institute patent litigation against any entity (including a\n      cross-claim or counterclaim in a lawsuit) alleging that the Work\n      or a Contribution incorporated within the Work constitutes direct\n      or contributory patent infringement, then any patent licenses\n      granted to You under this License for that Work shall terminate\n      as of the date such litigation is filed.\n\n   4. Redistribution. You may reproduce and distribute copies of the\n      Work or Derivative Works thereof in any medium, with or without\n      modifications, and in Source or Object form, provided that You\n      meet the following conditions:\n\n      (a) You must give any other recipients of the Work or\n          Derivative Works a copy of this License; and\n\n      (b) You must cause any modified files to carry prominent notices\n          stating that You changed the files; and\n\n      (c) You must retain, in the Source form of any Derivative Works\n          that You distribute, all copyright, patent, trademark, and\n          attribution notices from the Source form of the Work,\n          excluding those notices that do not pertain to any part of\n          the Derivative Works; and\n\n      (d) If the Work includes a \"NOTICE\" text file as part of its\n          distribution, then any Derivative Works that You distribute must\n          include a readable copy of the attribution notices contained\n          within such NOTICE file, excluding those notices that do not\n          pertain to any part of the Derivative Works, in at least one\n          of the following places: within a NOTICE text file distributed\n          as part of the Derivative Works; within the Source form or\n          documentation, if provided along with the Derivative Works; or,\n          within a display generated by the Derivative Works, if and\n          wherever such third-party notices normally appear. The contents\n          of the NOTICE file are for informational purposes only and\n          do not modify the License. You may add Your own attribution\n          notices within Derivative Works that You distribute, alongside\n          or as an addendum to the NOTICE text from the Work, provided\n          that such additional attribution notices cannot be construed\n          as modifying the License.\n\n      You may add Your own copyright statement to Your modifications and\n      may provide additional or different license terms and conditions\n      for use, reproduction, or distribution of Your modifications, or\n      for any such Derivative Works as a whole, provided Your use,\n      reproduction, and distribution of the Work otherwise complies with\n      the conditions stated in this License.\n\n   5. Submission of Contributions. Unless You explicitly state otherwise,\n      any Contribution intentionally submitted for inclusion in the Work\n      by You to the Licensor shall be under the terms and conditions of\n      this License, without any additional terms or conditions.\n      Notwithstanding the above, nothing herein shall supersede or modify\n      the terms of any separate license agreement you may have executed\n      with Licensor regarding such Contributions.\n\n   6. Trademarks. This License does not grant permission to use the trade\n      names, trademarks, service marks, or product names of the Licensor,\n      except as required for reasonable and customary use in describing the\n      origin of the Work and reproducing the content of the NOTICE file.\n\n   7. Disclaimer of Warranty. Unless required by applicable law or\n      agreed to in writing, Licensor provides the Work (and each\n      Contributor provides its Contributions) on an \"AS IS\" BASIS,\n      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or\n      implied, including, without limitation, any warranties or conditions\n      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A\n      PARTICULAR PURPOSE. You are solely responsible for determining the\n      appropriateness of using or redistributing the Work and assume any\n      risks associated with Your exercise of permissions under this License.\n\n   8. Limitation of Liability. In no event and under no legal theory,\n      whether in tort (including negligence), contract, or otherwise,\n      unless required by applicable law (such as deliberate and grossly\n      negligent acts) or agreed to in writing, shall any Contributor be\n      liable to You for damages, including any direct, indirect, special,\n      incidental, or consequential damages of any character arising as a\n      result of this License or out of the use or inability to use the\n      Work (including but not limited to damages for loss of goodwill,\n      work stoppage, computer failure or malfunction, or any and all\n      other commercial damages or losses), even if such Contributor\n      has been advised of the possibility of such damages.\n\n   9. Accepting Warranty or Additional Liability. While redistributing\n      the Work or Derivative Works thereof, You may choose to offer,\n      and charge a fee for, acceptance of support, warranty, indemnity,\n      or other liability obligations and/or rights consistent with this\n      License. However, in accepting such obligations, You may act only\n      on Your own behalf and on Your sole responsibility, not on behalf\n      of any other Contributor, and only if You agree to indemnify,\n      defend, and hold each Contributor harmless for any liability\n      incurred by, or claims asserted against, such Contributor by reason\n      of your accepting any such warranty or additional liability.\n\n   END OF TERMS AND CONDITIONS\n\n   APPENDIX: How to apply the Apache License to your work.\n\n      To apply the Apache License to your work, attach the following\n      boilerplate notice, with the fields enclosed by brackets \"[]\"\n      replaced with your own identifying information. (Don't include\n      the brackets!)  The text should be enclosed in the appropriate\n      comment syntax for the file format. We also recommend that a\n      file or class name and description of purpose be included on the\n      same \"printed page\" as the copyright notice for easier\n      identification within third-party archives.\n\n   Copyright 2018-2023 OpenMMLab.\n\n   Licensed under the Apache License, Version 2.0 (the \"License\");\n   you may not use this file except in compliance with the License.\n   You may obtain a copy of the License at\n\n       http://www.apache.org/licenses/LICENSE-2.0\n\n   Unless required by applicable law or agreed to in writing, software\n   distributed under the License is distributed on an \"AS IS\" BASIS,\n   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n   See the License for the specific language governing permissions and\n   limitations under the License.\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/MANIFEST.in",
    "content": "include requirements/*.txt\ninclude mmdet/VERSION\ninclude mmdet/.mim/model-index.yml\ninclude mmdet/.mim/demo/*/*\nrecursive-include mmdet/.mim/configs *.py *.yml\nrecursive-include mmdet/.mim/tools *.sh *.py\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/cityscapes_detection.py",
    "content": "# dataset settings\ndataset_type = 'CityscapesDataset'\ndata_root = 'data/cityscapes/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(2048, 1024),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=8,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root +\n            'annotations/instancesonly_filtered_gtFine_train.json',\n            img_prefix=data_root + 'leftImg8bit/train/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'annotations/instancesonly_filtered_gtFine_val.json',\n        img_prefix=data_root + 'leftImg8bit/val/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'annotations/instancesonly_filtered_gtFine_test.json',\n        img_prefix=data_root + 'leftImg8bit/test/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='bbox')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/cityscapes_instance.py",
    "content": "# dataset settings\ndataset_type = 'CityscapesDataset'\ndata_root = 'data/cityscapes/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n    \n        type='Resize', img_scale=[(2048, 800), (2048, 1024)], keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(2048, 1024),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=1,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=8,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root +\n            'annotations/instancesonly_filtered_gtFine_train.json',\n            img_prefix=data_root + 'leftImg8bit/train/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'annotations/instancesonly_filtered_gtFine_val.json',\n        img_prefix=data_root + 'leftImg8bit/val/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'annotations/instancesonly_filtered_gtFine_test.json',\n        img_prefix=data_root + 'leftImg8bit/test/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/coco_detection.py",
    "content": "# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='bbox')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/coco_instance.py",
    "content": "# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/coco_instance_semantic.py",
    "content": "# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='SegRescale', scale_factor=1 / 8),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip', flip_ratio=0.5),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/coco_panoptic.py",
    "content": "# dataset settings\ndataset_type = 'CocoPanopticDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadPanopticAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        with_seg=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='SegRescale', scale_factor=1 / 4),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/panoptic_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        seg_prefix=data_root + 'annotations/panoptic_train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/panoptic_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        seg_prefix=data_root + 'annotations/panoptic_val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/panoptic_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        seg_prefix=data_root + 'annotations/panoptic_val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric=['PQ'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/deepfashion.py",
    "content": "# dataset settings\ndataset_type = 'DeepFashionDataset'\ndata_root = 'data/DeepFashion/In-shop/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(750, 1101), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(750, 1101),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    imgs_per_gpu=2,\n    workers_per_gpu=1,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',\n        img_prefix=data_root + 'Img/',\n        pipeline=train_pipeline,\n        data_root=data_root),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/DeepFashion_segmentation_query.json',\n        img_prefix=data_root + 'Img/',\n        pipeline=test_pipeline,\n        data_root=data_root),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'annotations/DeepFashion_segmentation_gallery.json',\n        img_prefix=data_root + 'Img/',\n        pipeline=test_pipeline,\n        data_root=data_root))\nevaluation = dict(interval=5, metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/lvis_v0.5_instance.py",
    "content": "# dataset settings\n_base_ = 'coco_instance.py'\ndataset_type = 'LVISV05Dataset'\ndata_root = 'data/lvis_v0.5/'\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        _delete_=True,\n        type='ClassBalancedDataset',\n        oversample_thr=1e-3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/lvis_v0.5_train.json',\n            img_prefix=data_root + 'train2017/')),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v0.5_val.json',\n        img_prefix=data_root + 'val2017/'),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v0.5_val.json',\n        img_prefix=data_root + 'val2017/'))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/lvis_v1_instance.py",
    "content": "# dataset settings\n_base_ = 'coco_instance.py'\ndataset_type = 'LVISV1Dataset'\ndata_root = 'data/lvis_v1/'\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        _delete_=True,\n        type='ClassBalancedDataset',\n        oversample_thr=1e-3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/lvis_v1_train.json',\n            img_prefix=data_root)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_val.json',\n        img_prefix=data_root),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_val.json',\n        img_prefix=data_root))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/openimages_detection.py",
    "content": "# dataset settings\ndataset_type = 'OpenImagesDataset'\ndata_root = 'data/OpenImages/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, denorm_bbox=True),\n    dict(type='Resize', img_scale=(1024, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1024, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ],\n    ),\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=0,  # workers_per_gpu > 0 may occur out of memory\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/oidv6-train-annotations-bbox.csv',\n        img_prefix=data_root + 'OpenImages/train/',\n        label_file=data_root + 'annotations/class-descriptions-boxable.csv',\n        hierarchy_file=data_root +\n        'annotations/bbox_labels_600_hierarchy.json',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/validation-annotations-bbox.csv',\n        img_prefix=data_root + 'OpenImages/validation/',\n        label_file=data_root + 'annotations/class-descriptions-boxable.csv',\n        hierarchy_file=data_root +\n        'annotations/bbox_labels_600_hierarchy.json',\n        meta_file=data_root + 'annotations/validation-image-metas.pkl',\n        image_level_ann_file=data_root +\n        'annotations/validation-annotations-human-imagelabels-boxable.csv',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/validation-annotations-bbox.csv',\n        img_prefix=data_root + 'OpenImages/validation/',\n        label_file=data_root + 'annotations/class-descriptions-boxable.csv',\n        hierarchy_file=data_root +\n        'annotations/bbox_labels_600_hierarchy.json',\n        meta_file=data_root + 'annotations/validation-image-metas.pkl',\n        image_level_ann_file=data_root +\n        'annotations/validation-annotations-human-imagelabels-boxable.csv',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='mAP')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/voc0712.py",
    "content": "# dataset settings\ndataset_type = 'VOCDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1000, 600),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=[\n                data_root + 'VOC2007/ImageSets/Main/trainval.txt',\n                data_root + 'VOC2012/ImageSets/Main/trainval.txt'\n            ],\n            img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='mAP')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/datasets/wider_face.py",
    "content": "# dataset settings\ndataset_type = 'WIDERFaceDataset'\ndata_root = 'data/WIDERFace/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(300, 300), keep_ratio=False),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(300, 300),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=60,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'train.txt',\n            img_prefix=data_root + 'WIDER_train/',\n            min_size=17,\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'val.txt',\n        img_prefix=data_root + 'WIDER_val/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'val.txt',\n        img_prefix=data_root + 'WIDER_val/',\n        pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/default_runtime.py",
    "content": "checkpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\ncustom_hooks = [dict(type='NumClassCheckHook')]\n\ndist_params = dict(backend='nccl')\nlog_level = 'INFO'\nload_from = None\nresume_from = None\nworkflow = [('train', 1)]\n\n# disable opencv multithreading to avoid system being overloaded\nopencv_num_threads = 0\n# set multi-process start method as `fork` to speed up the training\nmp_start_method = 'fork'\n\n# Default setting for scaling LR automatically\n#   - `enable` means enable scaling LR automatically\n#       or not by default.\n#   - `base_batch_size` = (8 GPUs) x (2 samples per GPU).\nauto_scale_lr = dict(enable=False, base_batch_size=16)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='CascadeRoIHead',\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/cascade_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='CascadeRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='CascadeRoIHead',\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ]),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    match_low_quality=False,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/fast_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='FastRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/faster_rcnn_r50_caffe_c4.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='FasterRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[2, 4, 8, 16, 32],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[16]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        shared_head=dict(\n            type='ResLayer',\n            depth=50,\n            stage=3,\n            stride=2,\n            dilation=1,\n            style='caffe',\n            norm_cfg=norm_cfg,\n            norm_eval=True,\n            init_cfg=dict(\n                type='Pretrained',\n                checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=1024,\n            featmap_strides=[16]),\n        bbox_head=dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=12000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=6000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/faster_rcnn_r50_caffe_dc5.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='FasterRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        strides=(1, 2, 2, 1),\n        dilations=(1, 1, 1, 2),\n        out_indices=(3, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=2048,\n        feat_channels=2048,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[2, 4, 8, 16, 32],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[16]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=2048,\n            featmap_strides=[16]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=2048,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=12000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms=dict(type='nms', iou_threshold=0.7),\n            nms_pre=6000,\n            max_per_img=1000,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/faster_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='FasterRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100)\n        # soft-nms is also supported for rcnn testing\n        # e.g., nms=dict(type='soft_nms', iou_threshold=0.5, min_score=0.05)\n    ))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/mask_rcnn_r50_caffe_c4.py",
    "content": "# model settings\nnorm_cfg = dict(type='BN', requires_grad=False)\nmodel = dict(\n    type='MaskRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[2, 4, 8, 16, 32],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[16]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        shared_head=dict(\n            type='ResLayer',\n            depth=50,\n            stage=3,\n            stride=2,\n            dilation=1,\n            style='caffe',\n            norm_cfg=norm_cfg,\n            norm_eval=True),\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=1024,\n            featmap_strides=[16]),\n        bbox_head=dict(\n            type='BBoxHead',\n            with_avg_pool=True,\n            roi_feat_size=7,\n            in_channels=2048,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n        mask_roi_extractor=None,\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=0,\n            in_channels=2048,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=12000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=False,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=14,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=6000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            max_per_img=1000,\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/mask_rcnn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='MaskRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            mask_size=28,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/retinanet_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='RetinaNet',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_input',\n        num_outs=5),\n    bbox_head=dict(\n        type='RetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.5),\n        max_per_img=100))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/rpn_r50_caffe_c4.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=3,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        out_indices=(2, ),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    neck=None,\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=1024,\n        feat_channels=1024,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[2, 4, 8, 16, 32],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[16]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=12000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/rpn_r50_fpn.py",
    "content": "# model settings\nmodel = dict(\n    type='RPN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=2000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/models/ssd300.py",
    "content": "# model settings\ninput_size = 300\nmodel = dict(\n    type='SingleStageDetector',\n    backbone=dict(\n        type='SSDVGG',\n        depth=16,\n        with_last_pool=False,\n        ceil_mode=True,\n        out_indices=(3, 4),\n        out_feature_indices=(22, 34),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://vgg16_caffe')),\n    neck=dict(\n        type='SSDNeck',\n        in_channels=(512, 1024),\n        out_channels=(512, 1024, 512, 256, 256, 256),\n        level_strides=(2, 2, 1, 1),\n        level_paddings=(1, 1, 0, 0),\n        l2_norm_scale=20),\n    bbox_head=dict(\n        type='SSDHead',\n        in_channels=(512, 1024, 512, 256, 256, 256),\n        num_classes=80,\n        anchor_generator=dict(\n            type='SSDAnchorGenerator',\n            scale_major=False,\n            input_size=input_size,\n            basesize_ratio_range=(0.15, 0.9),\n            strides=[8, 16, 32, 64, 100, 300],\n            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2])),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.,\n            ignore_iof_thr=-1,\n            gt_max_assign_all=False),\n        smoothl1_beta=1.,\n        allowed_border=-1,\n        pos_weight=-1,\n        neg_pos_ratio=3,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        nms=dict(type='nms', iou_threshold=0.45),\n        min_bbox_size=0,\n        score_thr=0.02,\n        max_per_img=200))\ncudnn_benchmark = True\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/schedules/schedule_1x.py",
    "content": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/schedules/schedule_20e.py",
    "content": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/_base_/schedules/schedule_2x.py",
    "content": "# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/albu_example/mask_rcnn_r50_fpn_albu_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nalbu_train_transforms = [\n    dict(\n        type='ShiftScaleRotate',\n        shift_limit=0.0625,\n        scale_limit=0.0,\n        rotate_limit=0,\n        interpolation=1,\n        p=0.5),\n    dict(\n        type='RandomBrightnessContrast',\n        brightness_limit=[0.1, 0.3],\n        contrast_limit=[0.1, 0.3],\n        p=0.2),\n    dict(\n        type='OneOf',\n        transforms=[\n            dict(\n                type='RGBShift',\n                r_shift_limit=10,\n                g_shift_limit=10,\n                b_shift_limit=10,\n                p=1.0),\n            dict(\n                type='HueSaturationValue',\n                hue_shift_limit=20,\n                sat_shift_limit=30,\n                val_shift_limit=20,\n                p=1.0)\n        ],\n        p=0.1),\n    dict(type='JpegCompression', quality_lower=85, quality_upper=95, p=0.2),\n    dict(type='ChannelShuffle', p=0.1),\n    dict(\n        type='OneOf',\n        transforms=[\n            dict(type='Blur', blur_limit=3, p=1.0),\n            dict(type='MedianBlur', blur_limit=3, p=1.0)\n        ],\n        p=0.1),\n]\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='Pad', size_divisor=32),\n    dict(\n        type='Albu',\n        transforms=albu_train_transforms,\n        bbox_params=dict(\n            type='BboxParams',\n            format='pascal_voc',\n            label_fields=['gt_labels'],\n            min_visibility=0.0,\n            filter_lost_elements=True),\n        keymap={\n            'img': 'image',\n            'gt_masks': 'masks',\n            'gt_bboxes': 'bboxes'\n        },\n        update_pad_shape=False,\n        skip_img_without_anno=True),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'],\n        meta_keys=('filename', 'ori_shape', 'img_shape', 'img_norm_cfg',\n                   'pad_shape', 'scale_factor'))\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/atss/atss_r101_fpn_1x_coco.py",
    "content": "_base_ = './atss_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/atss/atss_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='ATSS',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='ATSSHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/atss/metafile.yml",
    "content": "Collections:\n  - Name: ATSS\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ATSS\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1912.02424\n      Title: 'Bridging the Gap Between Anchor-based and Anchor-free Detection via Adaptive Training Sample Selection'\n    README: configs/atss/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/atss.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: atss_r50_fpn_1x_coco\n    In Collection: ATSS\n    Config: configs/atss/atss_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n      inference time (ms/im):\n        - value: 50.76\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r50_fpn_1x_coco/atss_r50_fpn_1x_coco_20200209-985f7bd0.pth\n\n  - Name: atss_r101_fpn_1x_coco\n    In Collection: ATSS\n    Config: configs/atss/atss_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.6\n      inference time (ms/im):\n        - value: 81.3\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/atss/atss_r101_fpn_1x_coco/atss_r101_fpn_1x_20200825-dfcadd6f.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py",
    "content": "# We follow the original implementation which\n# adopts the Caffe pre-trained backbone.\n_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='AutoAssign',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5,\n        relu_before_extra_convs=True,\n        init_cfg=dict(type='Caffe2Xavier', layer='Conv2d')),\n    bbox_head=dict(\n        type='AutoAssignHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128],\n        loss_bbox=dict(type='GIoULoss', loss_weight=5.0)),\n    train_cfg=None,\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(lr=0.01, paramwise_cfg=dict(norm_decay_mult=0.))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 1000,\n    step=[8, 11])\ntotal_epochs = 12\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/autoassign/metafile.yml",
    "content": "Collections:\n  - Name: AutoAssign\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - AutoAssign\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/2007.03496\n      Title: 'AutoAssign: Differentiable Label Assignment for Dense Object Detection'\n    README: configs/autoassign/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/detectors/autoassign.py#L6\n      Version: v2.12.0\n\nModels:\n  - Name: autoassign_r50_fpn_8x2_1x_coco\n    In Collection: AutoAssign\n    Config: configs/autoassign/autoassign_r50_fpn_8x2_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.08\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/autoassign/auto_assign_r50_fpn_1x_coco/auto_assign_r50_fpn_1x_coco_20210413_115540-5e17991f.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    neck=dict(\n        type='FPN_CARAFE',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        start_level=0,\n        end_level=-1,\n        norm_cfg=None,\n        act_cfg=None,\n        order=('conv', 'norm', 'act'),\n        upsample_cfg=dict(\n            type='carafe',\n            up_kernel=5,\n            up_group=1,\n            encoder_kernel=3,\n            encoder_dilation=1,\n            compressed_channels=64)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=64),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=64),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    neck=dict(\n        type='FPN_CARAFE',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5,\n        start_level=0,\n        end_level=-1,\n        norm_cfg=None,\n        act_cfg=None,\n        order=('conv', 'norm', 'act'),\n        upsample_cfg=dict(\n            type='carafe',\n            up_kernel=5,\n            up_group=1,\n            encoder_kernel=3,\n            encoder_dilation=1,\n            compressed_channels=64)),\n    roi_head=dict(\n        mask_head=dict(\n            upsample_cfg=dict(\n                type='carafe',\n                scale_factor=2,\n                up_kernel=5,\n                up_group=1,\n                encoder_kernel=3,\n                encoder_dilation=1,\n                compressed_channels=64))))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=64),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=64),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/carafe/metafile.yml",
    "content": "Collections:\n  - Name: CARAFE\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RPN\n        - FPN_CARAFE\n        - ResNet\n        - RoIPool\n    Paper:\n      URL: https://arxiv.org/abs/1905.02188\n      Title: 'CARAFE: Content-Aware ReAssembly of FEatures'\n    README: configs/carafe/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/necks/fpn_carafe.py#L11\n      Version: v2.12.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_carafe_1x_coco\n    In Collection: CARAFE\n    Config: configs/carafe/faster_rcnn_r50_fpn_carafe_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.26\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/carafe/faster_rcnn_r50_fpn_carafe_1x_coco/faster_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.386_20200504_175733-385a75b7.pth\n\n  - Name: mask_rcnn_r50_fpn_carafe_1x_coco\n    In Collection: CARAFE\n    Config: configs/carafe/mask_rcnn_r50_fpn_carafe_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.31\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 35.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/carafe/mask_rcnn_r50_fpn_carafe_1x_coco/mask_rcnn_r50_fpn_carafe_1x_coco_bbox_mAP-0.393__segm_mAP-0.358_20200503_135957-8687f195.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = ['./cascade_mask_rcnn_r50_fpn_1x_coco.py']\n\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = ['./cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py']\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_20e.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco_instance.py',\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=8,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnext101_32x8d')))\n\n# ResNeXt-101-32x8d model trained with Caffe2 at FB,\n# so the mean and std need to be changed.\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675],\n    std=[57.375, 57.120, 58.395],\n    to_rgb=False)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    type='CascadeRCNN',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py",
    "content": "_base_ = './cascade_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    type='CascadeRCNN',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Cascade R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Cascade R-CNN\n        - FPN\n        - RPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: http://dx.doi.org/10.1109/tpami.2019.2956516\n      Title: 'Cascade R-CNN: Delving into High Quality Object Detection'\n    README: configs/cascade_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/cascade_rcnn.py#L6\n      Version: v2.0.0\n  - Name: Cascade Mask R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Cascade R-CNN\n        - FPN\n        - RPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: http://dx.doi.org/10.1109/tpami.2019.2956516\n      Title: 'Cascade R-CNN: Delving into High Quality Object Detection'\n    README: configs/cascade_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/cascade_rcnn.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: cascade_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.2\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_caffe_fpn_1x_coco/cascade_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.404_20200504_174853-b857be87.pth\n\n  - Name: cascade_rcnn_r50_fpn_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      inference time (ms/im):\n        - value: 62.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco/cascade_rcnn_r50_fpn_1x_coco_20200316-3dc56deb.pth\n\n  - Name: cascade_rcnn_r50_fpn_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      inference time (ms/im):\n        - value: 62.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco/cascade_rcnn_r50_fpn_20e_coco_bbox_mAP-0.41_20200504_175131-e9872a90.pth\n\n  - Name: cascade_rcnn_r101_caffe_fpn_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.2\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_caffe_fpn_1x_coco/cascade_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.423_20200504_175649-cab8dbd5.pth\n\n  - Name: cascade_rcnn_r101_fpn_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      inference time (ms/im):\n        - value: 74.07\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco/cascade_rcnn_r101_fpn_1x_coco_20200317-0b6a2fbf.pth\n\n  - Name: cascade_rcnn_r101_fpn_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      inference time (ms/im):\n        - value: 74.07\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_r101_fpn_20e_coco/cascade_rcnn_r101_fpn_20e_coco_bbox_mAP-0.425_20200504_231812-5057dcc5.pth\n\n  - Name: cascade_rcnn_x101_32x4d_fpn_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      inference time (ms/im):\n        - value: 91.74\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_1x_coco/cascade_rcnn_x101_32x4d_fpn_1x_coco_20200316-95c2deb6.pth\n\n  - Name: cascade_rcnn_x101_32x4d_fpn_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_32x4d_fpn_20e_coco/cascade_rcnn_x101_32x4d_fpn_20e_coco_20200906_134608-9ae0a720.pth\n\n  - Name: cascade_rcnn_x101_64x4d_fpn_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.7\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_1x_coco/cascade_rcnn_x101_64x4d_fpn_1x_coco_20200515_075702-43ce6a30.pth\n\n  - Name: cascade_rcnn_x101_64x4d_fpn_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 10.7\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_rcnn_x101_64x4d_fpn_20e_coco/cascade_rcnn_x101_64x4d_fpn_20e_coco_20200509_224357-051557b1.pth\n\n  - Name: cascade_mask_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.9\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_1x_coco/cascade_mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.412__segm_mAP-0.36_20200504_174659-5004b251.pth\n\n  - Name: cascade_mask_rcnn_r50_fpn_1x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 89.29\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  35.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth\n\n  - Name: cascade_mask_rcnn_r50_fpn_20e_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 89.29\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth\n\n  - Name: cascade_mask_rcnn_r101_caffe_fpn_1x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_1x_coco/cascade_mask_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.432__segm_mAP-0.376_20200504_174813-5c1e9599.pth\n\n  - Name: cascade_mask_rcnn_r101_fpn_1x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.9\n      inference time (ms/im):\n        - value: 102.04\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth\n\n  - Name: cascade_mask_rcnn_r101_fpn_20e_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 7.9\n      inference time (ms/im):\n        - value: 102.04\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_1x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.2\n      inference time (ms/im):\n        - value: 116.28\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_20e_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 9.2\n      inference time (ms/im):\n        - value: 116.28\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth\n\n  - Name: cascade_mask_rcnn_x101_64x4d_fpn_1x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 12.2\n      inference time (ms/im):\n        - value: 149.25\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  39.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth\n\n  - Name: cascade_mask_rcnn_x101_64x4d_fpn_20e_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 12.2\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth\n\n  - Name: cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.7\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210707_002651-6e29b3a6.pth\n\n  - Name: cascade_mask_rcnn_r50_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.9\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco/cascade_mask_rcnn_r50_fpn_mstrain_3x_coco_20210628_164719-5bdc3824.pth\n\n  - Name: cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.7\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210707_002620-a5bd2389.pth\n\n  - Name: cascade_mask_rcnn_r101_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco/cascade_mask_rcnn_r101_fpn_mstrain_3x_coco_20210628_165236-51a2d363.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 9.0\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210706_225234-40773067.pth\n\n  - Name: cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 12.1\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210719_180640-9ff7e76f.pth\n\n  - Name: cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 12.0\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco/cascade_mask_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210719_210311-d3e64ba0.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    roi_head=dict(\n        bbox_head=dict(\n            bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rcnn=dict(\n            assigner=dict(\n                pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),\n            sampler=dict(num=256))),\n    test_cfg=dict(rcnn=dict(score_thr=1e-3)))\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=300),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=300),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='ToTensor', keys=['proposals']),\n            dict(\n                type='ToDataContainer',\n                fields=[dict(key='proposals', stack=False)]),\n            dict(type='Collect', keys=['img', 'proposals']),\n        ])\n]\ndata = dict(\n    train=dict(\n        proposal_file=data_root +\n        'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl',\n        pipeline=train_pipeline),\n    val=dict(\n        proposal_file=data_root +\n        'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',\n        pipeline=test_pipeline),\n    test=dict(\n        proposal_file=data_root +\n        'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',\n        pipeline=test_pipeline))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py'\nrpn_weight = 0.7\nmodel = dict(\n    rpn_head=dict(\n        _delete_=True,\n        type='CascadeRPNHead',\n        num_stages=2,\n        stages=[\n            dict(\n                type='StageCascadeRPNHead',\n                in_channels=256,\n                feat_channels=256,\n                anchor_generator=dict(\n                    type='AnchorGenerator',\n                    scales=[8],\n                    ratios=[1.0],\n                    strides=[4, 8, 16, 32, 64]),\n                adapt_cfg=dict(type='dilation', dilation=3),\n                bridged_feature=True,\n                sampling=False,\n                with_cls=False,\n                reg_decoded_bbox=True,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=(.0, .0, .0, .0),\n                    target_stds=(0.1, 0.1, 0.5, 0.5)),\n                loss_bbox=dict(\n                    type='IoULoss', linear=True,\n                    loss_weight=10.0 * rpn_weight)),\n            dict(\n                type='StageCascadeRPNHead',\n                in_channels=256,\n                feat_channels=256,\n                adapt_cfg=dict(type='offset'),\n                bridged_feature=False,\n                sampling=True,\n                with_cls=True,\n                reg_decoded_bbox=True,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=(.0, .0, .0, .0),\n                    target_stds=(0.05, 0.05, 0.1, 0.1)),\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=True,\n                    loss_weight=1.0 * rpn_weight),\n                loss_bbox=dict(\n                    type='IoULoss', linear=True,\n                    loss_weight=10.0 * rpn_weight))\n        ]),\n    roi_head=dict(\n        bbox_head=dict(\n            bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.5),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=[\n            dict(\n                assigner=dict(\n                    type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5),\n                allowed_border=-1,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.3,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=256,\n                    pos_fraction=0.5,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=False),\n                allowed_border=-1,\n                pos_weight=-1,\n                debug=False)\n        ],\n        rpn_proposal=dict(max_per_img=300, nms=dict(iou_threshold=0.8)),\n        rcnn=dict(\n            assigner=dict(\n                pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),\n            sampler=dict(type='RandomSampler', num=256))),\n    test_cfg=dict(\n        rpn=dict(max_per_img=300, nms=dict(iou_threshold=0.8)),\n        rcnn=dict(score_thr=1e-3)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rpn/crpn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    rpn_head=dict(\n        _delete_=True,\n        type='CascadeRPNHead',\n        num_stages=2,\n        stages=[\n            dict(\n                type='StageCascadeRPNHead',\n                in_channels=256,\n                feat_channels=256,\n                anchor_generator=dict(\n                    type='AnchorGenerator',\n                    scales=[8],\n                    ratios=[1.0],\n                    strides=[4, 8, 16, 32, 64]),\n                adapt_cfg=dict(type='dilation', dilation=3),\n                bridged_feature=True,\n                sampling=False,\n                with_cls=False,\n                reg_decoded_bbox=True,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=(.0, .0, .0, .0),\n                    target_stds=(0.1, 0.1, 0.5, 0.5)),\n                loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0)),\n            dict(\n                type='StageCascadeRPNHead',\n                in_channels=256,\n                feat_channels=256,\n                adapt_cfg=dict(type='offset'),\n                bridged_feature=False,\n                sampling=True,\n                with_cls=True,\n                reg_decoded_bbox=True,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=(.0, .0, .0, .0),\n                    target_stds=(0.05, 0.05, 0.1, 0.1)),\n                loss_cls=dict(\n                    type='CrossEntropyLoss', use_sigmoid=True,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='IoULoss', linear=True, loss_weight=10.0))\n        ]),\n    train_cfg=dict(rpn=[\n        dict(\n            assigner=dict(\n                type='RegionAssigner', center_ratio=0.2, ignore_ratio=0.5),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.7,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1,\n                iou_calculator=dict(type='BboxOverlaps2D')),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False)\n    ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.8),\n            min_bbox_size=0)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cascade_rpn/metafile.yml",
    "content": "Collections:\n  - Name: Cascade RPN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Cascade RPN\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1909.06720\n      Title: 'Cascade RPN: Delving into High-Quality Region Proposal Network with Adaptive Convolution'\n    README: configs/cascade_rpn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.8.0/mmdet/models/dense_heads/cascade_rpn_head.py#L538\n      Version: v2.8.0\n\nModels:\n  - Name: crpn_fast_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Cascade RPN\n    Config: configs/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_fast_rcnn_r50_caffe_fpn_1x_coco/crpn_fast_rcnn_r50_caffe_fpn_1x_coco-cb486e66.pth\n\n  - Name: crpn_faster_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Cascade RPN\n    Config: configs/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cascade_rpn/crpn_faster_rcnn_r50_caffe_fpn_1x_coco/crpn_faster_rcnn_r50_caffe_fpn_1x_coco-c8283cca.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/centernet/centernet_resnet18_140e_coco.py",
    "content": "_base_ = './centernet_resnet18_dcnv2_140e_coco.py'\n\nmodel = dict(neck=dict(use_dcn=False))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/centernet/centernet_resnet18_dcnv2_140e_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='CenterNet',\n    backbone=dict(\n        type='ResNet',\n        depth=18,\n        norm_eval=False,\n        norm_cfg=dict(type='BN'),\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),\n    neck=dict(\n        type='CTResNetNeck',\n        in_channel=512,\n        num_deconv_filters=(256, 128, 64),\n        num_deconv_kernels=(4, 4, 4),\n        use_dcn=True),\n    bbox_head=dict(\n        type='CenterNetHead',\n        num_classes=80,\n        in_channel=64,\n        feat_channel=64,\n        loss_center_heatmap=dict(type='GaussianFocalLoss', loss_weight=1.0),\n        loss_wh=dict(type='L1Loss', loss_weight=0.1),\n        loss_offset=dict(type='L1Loss', loss_weight=1.0)),\n    train_cfg=None,\n    test_cfg=dict(topk=100, local_maximum_kernel=3, max_per_img=100))\n\n# We fixed the incorrect img_norm_cfg problem in the source code.\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True, color_type='color'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='RandomCenterCropPad',\n        crop_size=(512, 512),\n        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),\n        mean=[0, 0, 0],\n        std=[1, 1, 1],\n        to_rgb=True,\n        test_pad_mode=None),\n    dict(type='Resize', img_scale=(512, 512), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(\n                type='RandomCenterCropPad',\n                ratios=None,\n                border=None,\n                mean=[0, 0, 0],\n                std=[1, 1, 1],\n                to_rgb=True,\n                test_mode=True,\n                test_pad_mode=['logical_or', 31],\n                test_pad_add_pix=1),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='DefaultFormatBundle'),\n            dict(\n                type='Collect',\n                meta_keys=('filename', 'ori_filename', 'ori_shape',\n                           'img_shape', 'pad_shape', 'scale_factor', 'flip',\n                           'flip_direction', 'img_norm_cfg', 'border'),\n                keys=['img'])\n        ])\n]\n\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=16,\n    workers_per_gpu=4,\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# optimizer\n# Based on the default settings of modern detectors, the SGD effect is better\n# than the Adam in the source code, so we use SGD default settings and\n# if you use adam+lr5e-4, the map is 29.1.\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n\n# learning policy\n# Based on the default settings of modern detectors, we added warmup settings.\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=1.0 / 1000,\n    step=[18, 24])  # the real step is [18*5, 24*5]\nrunner = dict(max_epochs=28)  # the real epoch is 28*5=140\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (16 samples per GPU)\nauto_scale_lr = dict(base_batch_size=128)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/centernet/metafile.yml",
    "content": "Collections:\n  - Name: CenterNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x TITANXP GPUs\n      Architecture:\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1904.07850\n      Title: 'Objects as Points'\n    README: configs/centernet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.13.0/mmdet/models/detectors/centernet.py#L10\n      Version: v2.13.0\n\nModels:\n  - Name: centernet_resnet18_dcnv2_140e_coco\n    In Collection: CenterNet\n    Config: configs/centernet/centernet_resnet18_dcnv2_140e_coco.py\n    Metadata:\n      Batch Size: 128\n      Training Memory (GB): 3.47\n      Epochs: 140\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 29.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_dcnv2_140e_coco/centernet_resnet18_dcnv2_140e_coco_20210702_155131-c8cd631f.pth\n\n  - Name: centernet_resnet18_140e_coco\n    In Collection: CenterNet\n    Config: configs/centernet/centernet_resnet18_140e_coco.py\n    Metadata:\n      Batch Size: 128\n      Training Memory (GB): 3.45\n      Epochs: 140\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 25.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/centernet/centernet_resnet18_140e_coco/centernet_resnet18_140e_coco_20210705_093630-bb5b3bf7.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py",
    "content": "_base_ = [\n    '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'\n]\n\n# model settings\nmodel = dict(\n    type='CornerNet',\n    backbone=dict(\n        type='HourglassNet',\n        downsample_times=5,\n        num_stacks=2,\n        stage_channels=[256, 256, 384, 384, 384, 512],\n        stage_blocks=[2, 2, 2, 2, 2, 4],\n        norm_cfg=dict(type='BN', requires_grad=True)),\n    neck=None,\n    bbox_head=dict(\n        type='CentripetalHead',\n        num_classes=80,\n        in_channels=256,\n        num_feat_levels=2,\n        corner_emb_channels=0,\n        loss_heatmap=dict(\n            type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),\n        loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1),\n        loss_guiding_shift=dict(\n            type='SmoothL1Loss', beta=1.0, loss_weight=0.05),\n        loss_centripetal_shift=dict(\n            type='SmoothL1Loss', beta=1.0, loss_weight=1)),\n    # training and testing settings\n    train_cfg=None,\n    test_cfg=dict(\n        corner_topk=100,\n        local_maximum_kernel=3,\n        distance_threshold=0.5,\n        score_thr=0.05,\n        max_per_img=100,\n        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))\n# data settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='RandomCenterCropPad',\n        crop_size=(511, 511),\n        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),\n        test_mode=False,\n        test_pad_mode=None,\n        **img_norm_cfg),\n    dict(type='Resize', img_scale=(511, 511), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=True,\n        transforms=[\n            dict(type='Resize'),\n            dict(\n                type='RandomCenterCropPad',\n                crop_size=None,\n                ratios=None,\n                border=None,\n                test_mode=True,\n                test_pad_mode=['logical_or', 127],\n                **img_norm_cfg),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(\n                type='Collect',\n                keys=['img'],\n                meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',\n                           'scale_factor', 'flip', 'img_norm_cfg', 'border')),\n        ])\n]\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=3,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='Adam', lr=0.0005)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[190])\nrunner = dict(type='EpochBasedRunner', max_epochs=210)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (16 GPUs) x (6 samples per GPU)\nauto_scale_lr = dict(base_batch_size=96)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/centripetalnet/metafile.yml",
    "content": "Collections:\n  - Name: CentripetalNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - Adam\n      Training Resources: 16x V100 GPUs\n      Architecture:\n        - Corner Pooling\n        - Stacked Hourglass Network\n    Paper:\n      URL: https://arxiv.org/abs/2003.09119\n      Title: 'CentripetalNet: Pursuing High-quality Keypoint Pairs for Object Detection'\n    README: configs/centripetalnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.5.0/mmdet/models/detectors/cornernet.py#L9\n      Version: v2.5.0\n\nModels:\n  - Name: centripetalnet_hourglass104_mstest_16x6_210e_coco\n    In Collection: CentripetalNet\n    Config: configs/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco.py\n    Metadata:\n      Batch Size: 96\n      Training Memory (GB): 16.7\n      inference time (ms/im):\n        - value: 270.27\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 210\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/centripetalnet/centripetalnet_hourglass104_mstest_16x6_210e_coco/centripetalnet_hourglass104_mstest_16x6_210e_coco_20200915_204804-3ccc61e5.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cityscapes/faster_rcnn_r50_fpn_1x_cityscapes.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/cityscapes_detection.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(init_cfg=None),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=8,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))))\n# optimizer\n# lr is set for a batch size of 8\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    # [7] yields higher performance than [6]\n    step=[7])\nrunner = dict(\n    type='EpochBasedRunner', max_epochs=8)  # actual epoch = 8 * 8 = 64\nlog_config = dict(interval=100)\n# For better, more stable performance initialize from COCO\nload_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'  # noqa\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (1 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cityscapes/mask_rcnn_r50_fpn_1x_cityscapes.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/cityscapes_instance.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(init_cfg=None),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=8,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),\n        mask_head=dict(\n            type='FCNMaskHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=8,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))))\n# optimizer\n# lr is set for a batch size of 8\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    # [7] yields higher performance than [6]\n    step=[7])\nrunner = dict(\n    type='EpochBasedRunner', max_epochs=8)  # actual epoch = 8 * 8 = 64\nlog_config = dict(interval=100)\n# For better, more stable performance initialize from COCO\nload_from = 'https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth'  # noqa\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (1 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/common/lsj_100e_coco_instance.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nimage_size = (1024, 1024)\n\nfile_client_args = dict(backend='disk')\n# comment out the code below to use different file client\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/': 's3://openmmlab/datasets/detection/',\n#         'data/': 's3://openmmlab/datasets/detection/'\n#     }))\n\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=image_size,\n        ratio_range=(0.1, 2.0),\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='RandomCrop',\n        crop_type='absolute_range',\n        crop_size=image_size,\n        recompute_bbox=True,\n        allow_negative_crop=True),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=image_size),  # padding to image_size leads 0.5+ mAP\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=4,  # simply change this from 2 to 16 for 50e - 400e training.\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=5, metric=['bbox', 'segm'])\n\n# optimizer assumes bs=64\noptimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004)\noptimizer_config = dict(grad_clip=None)\n\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.067,\n    step=[22, 24])\nrunner = dict(type='EpochBasedRunner', max_epochs=25)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/common/mstrain-poly_3x_coco_instance.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric=['bbox', 'segm'])\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n\n# learning policy\n# Experiments show that using step=[9, 11] has higher performance\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[9, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/common/mstrain_3x_coco.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='bbox')\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n\n# learning policy\n# Experiments show that using step=[9, 11] has higher performance\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[9, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/common/mstrain_3x_coco_instance.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric=['bbox', 'segm'])\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n\n# learning policy\n# Experiments show that using step=[9, 11] has higher performance\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[9, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/common/ssj_270k_coco_instance.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nimage_size = (1024, 1024)\n\nfile_client_args = dict(backend='disk')\n\n# Standard Scale Jittering (SSJ) resizes and crops an image\n# with a resize range of 0.8 to 1.25 of the original image size.\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=image_size,\n        ratio_range=(0.8, 1.25),\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='RandomCrop',\n        crop_type='absolute_range',\n        crop_size=image_size,\n        recompute_bbox=True,\n        allow_negative_crop=True),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=image_size),  # padding to image_size leads 0.5+ mAP\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n\nevaluation = dict(interval=6000, metric=['bbox', 'segm'])\n\n# optimizer assumes batch_size = (32 GPUs) x (2 samples per GPU)\noptimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004)\noptimizer_config = dict(grad_clip=None)\n\n# lr steps at [0.9, 0.95, 0.975] of the maximum iterations\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.001,\n    step=[243000, 256500, 263250])\ncheckpoint_config = dict(interval=6000)\n# The model is trained by 270k iterations with batch_size 64,\n# which is roughly equivalent to 144 epochs.\nrunner = dict(type='IterBasedRunner', max_iters=270000)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/common/ssj_scp_270k_coco_instance.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nimage_size = (1024, 1024)\n\nfile_client_args = dict(backend='disk')\n\n# Standard Scale Jittering (SSJ) resizes and crops an image\n# with a resize range of 0.8 to 1.25 of the original image size.\nload_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=image_size,\n        ratio_range=(0.8, 1.25),\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='RandomCrop',\n        crop_type='absolute_range',\n        crop_size=image_size,\n        recompute_bbox=True,\n        allow_negative_crop=True),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Pad', size=image_size),\n]\ntrain_pipeline = [\n    dict(type='CopyPaste', max_num_pasted=100),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='MultiImageMixDataset',\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=load_pipeline),\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n\nevaluation = dict(interval=6000, metric=['bbox', 'segm'])\n\n# optimizer assumes batch_size = (32 GPUs) x (2 samples per GPU)\noptimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.00004)\noptimizer_config = dict(grad_clip=None)\n\n# lr steps at [0.9, 0.95, 0.975] of the maximum iterations\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.001,\n    step=[243000, 256500, 263250])\ncheckpoint_config = dict(interval=6000)\n# The model is trained by 270k iterations with batch_size 64,\n# which is roughly equivalent to 144 epochs.\nrunner = dict(type='IterBasedRunner', max_iters=270000)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py'  # noqa\n\n# please install mmcls>=0.22.0\n# import mmcls.models to trigger register_module in mmcls\ncustom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)\ncheckpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-small_3rdparty_32xb128-noema_in1k_20220301-303e75e3.pth'  # noqa\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='mmcls.ConvNeXt',\n        arch='small',\n        out_indices=[0, 1, 2, 3],\n        drop_path_rate=0.6,\n        layer_scale_init_value=1.0,\n        gap_before_final_norm=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint=checkpoint_file,\n            prefix='backbone.')))\n\noptimizer = dict(\n    _delete_=True,\n    constructor='LearningRateDecayOptimizerConstructor',\n    type='AdamW',\n    lr=0.0002,\n    betas=(0.9, 0.999),\n    weight_decay=0.05,\n    paramwise_cfg={\n        'decay_rate': 0.7,\n        'decay_type': 'layer_wise',\n        'num_layers': 12\n    })\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# please install mmcls>=0.22.0\n# import mmcls.models to trigger register_module in mmcls\ncustom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)\ncheckpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='mmcls.ConvNeXt',\n        arch='tiny',\n        out_indices=[0, 1, 2, 3],\n        drop_path_rate=0.4,\n        layer_scale_init_value=1.0,\n        gap_before_final_norm=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint=checkpoint_file,\n            prefix='backbone.')),\n    neck=dict(in_channels=[96, 192, 384, 768]),\n    roi_head=dict(bbox_head=[\n        dict(\n            type='ConvFCBBoxHead',\n            num_shared_convs=4,\n            num_shared_fcs=1,\n            in_channels=256,\n            conv_out_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            reg_decoded_bbox=True,\n            norm_cfg=dict(type='SyncBN', requires_grad=True),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),\n        dict(\n            type='ConvFCBBoxHead',\n            num_shared_convs=4,\n            num_shared_fcs=1,\n            in_channels=256,\n            conv_out_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.05, 0.05, 0.1, 0.1]),\n            reg_class_agnostic=False,\n            reg_decoded_bbox=True,\n            norm_cfg=dict(type='SyncBN', requires_grad=True),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='GIoULoss', loss_weight=10.0)),\n        dict(\n            type='ConvFCBBoxHead',\n            num_shared_convs=4,\n            num_shared_fcs=1,\n            in_channels=256,\n            conv_out_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.033, 0.033, 0.067, 0.067]),\n            reg_class_agnostic=False,\n            reg_decoded_bbox=True,\n            norm_cfg=dict(type='SyncBN', requires_grad=True),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='GIoULoss', loss_weight=10.0))\n    ]))\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# augmentation strategy originates from DETR / Sparse RCNN\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline), persistent_workers=True)\n\noptimizer = dict(\n    _delete_=True,\n    constructor='LearningRateDecayOptimizerConstructor',\n    type='AdamW',\n    lr=0.0002,\n    betas=(0.9, 0.999),\n    weight_decay=0.05,\n    paramwise_cfg={\n        'decay_rate': 0.7,\n        'decay_type': 'layer_wise',\n        'num_layers': 6\n    })\n\nlr_config = dict(warmup_iters=1000, step=[27, 33])\nrunner = dict(max_epochs=36)\n\n# you need to set mode='dynamic' if you are using pytorch<=1.5.0\nfp16 = dict(loss_scale=dict(init_scale=512))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# please install mmcls>=0.22.0\n# import mmcls.models to trigger register_module in mmcls\ncustom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)\ncheckpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='mmcls.ConvNeXt',\n        arch='tiny',\n        out_indices=[0, 1, 2, 3],\n        drop_path_rate=0.4,\n        layer_scale_init_value=1.0,\n        gap_before_final_norm=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint=checkpoint_file,\n            prefix='backbone.')),\n    neck=dict(in_channels=[96, 192, 384, 768]))\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# augmentation strategy originates from DETR / Sparse RCNN\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline), persistent_workers=True)\n\noptimizer = dict(\n    _delete_=True,\n    constructor='LearningRateDecayOptimizerConstructor',\n    type='AdamW',\n    lr=0.0001,\n    betas=(0.9, 0.999),\n    weight_decay=0.05,\n    paramwise_cfg={\n        'decay_rate': 0.95,\n        'decay_type': 'layer_wise',\n        'num_layers': 6\n    })\n\nlr_config = dict(warmup_iters=1000, step=[27, 33])\nrunner = dict(max_epochs=36)\n\n# you need to set mode='dynamic' if you are using pytorch<=1.5.0\nfp16 = dict(loss_scale=dict(init_scale=512))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/convnext/metafile.yml",
    "content": "Models:\n  - Name: mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.3\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Mixed Precision Training\n      Training Resources: 8x A100 GPUs\n      Architecture:\n        - ConvNeXt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco/mask_rcnn_convnext-t_p4_w7_fpn_fp16_ms-crop_3x_coco_20220426_154953-050731f4.pth\n    Paper:\n      URL: https://arxiv.org/abs/2201.03545\n      Title: 'A ConvNet for the 2020s'\n    README: configs/convnext/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.16.0\n\n  - Name: cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py\n    Metadata:\n      Training Memory (GB): 9.0\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Mixed Precision Training\n      Training Resources: 8x A100 GPUs\n      Architecture:\n        - ConvNeXt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 50.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 43.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-t_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220509_204200-8f07c40b.pth\n    Paper:\n      URL: https://arxiv.org/abs/2201.03545\n      Title: 'A ConvNet for the 2020s'\n    README: configs/convnext/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.25.0\n\n  - Name: cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco\n    In Collection: Cascade Mask R-CNN\n    Config: configs/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco.py\n    Metadata:\n      Training Memory (GB): 12.3\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Mixed Precision Training\n      Training Resources: 8x A100 GPUs\n      Architecture:\n        - ConvNeXt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 51.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 44.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/convnext/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco/cascade_mask_rcnn_convnext-s_p4_w7_fpn_giou_4conv1f_fp16_ms-crop_3x_coco_20220510_201004-3d24f5a4.pth\n    Paper:\n      URL: https://arxiv.org/abs/2201.03545\n      Title: 'A ConvNet for the 2020s'\n    README: configs/convnext/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.25.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py",
    "content": "_base_ = [\n    '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'\n]\n\n# model settings\nmodel = dict(\n    type='CornerNet',\n    backbone=dict(\n        type='HourglassNet',\n        downsample_times=5,\n        num_stacks=2,\n        stage_channels=[256, 256, 384, 384, 384, 512],\n        stage_blocks=[2, 2, 2, 2, 2, 4],\n        norm_cfg=dict(type='BN', requires_grad=True)),\n    neck=None,\n    bbox_head=dict(\n        type='CornerHead',\n        num_classes=80,\n        in_channels=256,\n        num_feat_levels=2,\n        corner_emb_channels=1,\n        loss_heatmap=dict(\n            type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),\n        loss_embedding=dict(\n            type='AssociativeEmbeddingLoss',\n            pull_weight=0.10,\n            push_weight=0.10),\n        loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)),\n    # training and testing settings\n    train_cfg=None,\n    test_cfg=dict(\n        corner_topk=100,\n        local_maximum_kernel=3,\n        distance_threshold=0.5,\n        score_thr=0.05,\n        max_per_img=100,\n        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))\n# data settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='RandomCenterCropPad',\n        crop_size=(511, 511),\n        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),\n        test_mode=False,\n        test_pad_mode=None,\n        **img_norm_cfg),\n    dict(type='Resize', img_scale=(511, 511), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=True,\n        transforms=[\n            dict(type='Resize'),\n            dict(\n                type='RandomCenterCropPad',\n                crop_size=None,\n                ratios=None,\n                border=None,\n                test_mode=True,\n                test_pad_mode=['logical_or', 127],\n                **img_norm_cfg),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(\n                type='Collect',\n                keys=['img'],\n                meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',\n                           'scale_factor', 'flip', 'img_norm_cfg', 'border')),\n        ])\n]\ndata = dict(\n    samples_per_gpu=5,\n    workers_per_gpu=3,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='Adam', lr=0.0005)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[180])\nrunner = dict(type='EpochBasedRunner', max_epochs=210)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (10 GPUs) x (5 samples per GPU)\nauto_scale_lr = dict(base_batch_size=50)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py",
    "content": "_base_ = [\n    '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'\n]\n\n# model settings\nmodel = dict(\n    type='CornerNet',\n    backbone=dict(\n        type='HourglassNet',\n        downsample_times=5,\n        num_stacks=2,\n        stage_channels=[256, 256, 384, 384, 384, 512],\n        stage_blocks=[2, 2, 2, 2, 2, 4],\n        norm_cfg=dict(type='BN', requires_grad=True)),\n    neck=None,\n    bbox_head=dict(\n        type='CornerHead',\n        num_classes=80,\n        in_channels=256,\n        num_feat_levels=2,\n        corner_emb_channels=1,\n        loss_heatmap=dict(\n            type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),\n        loss_embedding=dict(\n            type='AssociativeEmbeddingLoss',\n            pull_weight=0.10,\n            push_weight=0.10),\n        loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)),\n    # training and testing settings\n    train_cfg=None,\n    test_cfg=dict(\n        corner_topk=100,\n        local_maximum_kernel=3,\n        distance_threshold=0.5,\n        score_thr=0.05,\n        max_per_img=100,\n        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))\n# data settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='RandomCenterCropPad',\n        crop_size=(511, 511),\n        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),\n        test_mode=False,\n        test_pad_mode=None,\n        **img_norm_cfg),\n    dict(type='Resize', img_scale=(511, 511), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=True,\n        transforms=[\n            dict(type='Resize'),\n            dict(\n                type='RandomCenterCropPad',\n                crop_size=None,\n                ratios=None,\n                border=None,\n                test_mode=True,\n                test_pad_mode=['logical_or', 127],\n                **img_norm_cfg),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(\n                type='Collect',\n                keys=['img'],\n                meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',\n                           'scale_factor', 'flip', 'img_norm_cfg', 'border')),\n        ])\n]\ndata = dict(\n    samples_per_gpu=3,\n    workers_per_gpu=3,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='Adam', lr=0.0005)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[180])\nrunner = dict(type='EpochBasedRunner', max_epochs=210)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (3 samples per GPU)\nauto_scale_lr = dict(base_batch_size=96)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py",
    "content": "_base_ = [\n    '../_base_/default_runtime.py', '../_base_/datasets/coco_detection.py'\n]\n\n# model settings\nmodel = dict(\n    type='CornerNet',\n    backbone=dict(\n        type='HourglassNet',\n        downsample_times=5,\n        num_stacks=2,\n        stage_channels=[256, 256, 384, 384, 384, 512],\n        stage_blocks=[2, 2, 2, 2, 2, 4],\n        norm_cfg=dict(type='BN', requires_grad=True)),\n    neck=None,\n    bbox_head=dict(\n        type='CornerHead',\n        num_classes=80,\n        in_channels=256,\n        num_feat_levels=2,\n        corner_emb_channels=1,\n        loss_heatmap=dict(\n            type='GaussianFocalLoss', alpha=2.0, gamma=4.0, loss_weight=1),\n        loss_embedding=dict(\n            type='AssociativeEmbeddingLoss',\n            pull_weight=0.10,\n            push_weight=0.10),\n        loss_offset=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1)),\n    # training and testing settings\n    train_cfg=None,\n    test_cfg=dict(\n        corner_topk=100,\n        local_maximum_kernel=3,\n        distance_threshold=0.5,\n        score_thr=0.05,\n        max_per_img=100,\n        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian')))\n# data settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='RandomCenterCropPad',\n        crop_size=(511, 511),\n        ratios=(0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3),\n        test_mode=False,\n        test_pad_mode=None,\n        **img_norm_cfg),\n    dict(type='Resize', img_scale=(511, 511), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(\n        type='MultiScaleFlipAug',\n        scale_factor=1.0,\n        flip=True,\n        transforms=[\n            dict(type='Resize'),\n            dict(\n                type='RandomCenterCropPad',\n                crop_size=None,\n                ratios=None,\n                border=None,\n                test_mode=True,\n                test_pad_mode=['logical_or', 127],\n                **img_norm_cfg),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(\n                type='Collect',\n                keys=['img'],\n                meta_keys=('filename', 'ori_shape', 'img_shape', 'pad_shape',\n                           'scale_factor', 'flip', 'img_norm_cfg', 'border')),\n        ])\n]\ndata = dict(\n    samples_per_gpu=6,\n    workers_per_gpu=3,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='Adam', lr=0.0005)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[180])\nrunner = dict(type='EpochBasedRunner', max_epochs=210)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (6 samples per GPU)\nauto_scale_lr = dict(base_batch_size=48)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/cornernet/metafile.yml",
    "content": "Collections:\n  - Name: CornerNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - Adam\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Corner Pooling\n        - Stacked Hourglass Network\n    Paper:\n      URL: https://arxiv.org/abs/1808.01244\n      Title: 'CornerNet: Detecting Objects as Paired Keypoints'\n    README: configs/cornernet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.3.0/mmdet/models/detectors/cornernet.py#L9\n      Version: v2.3.0\n\nModels:\n  - Name: cornernet_hourglass104_mstest_10x5_210e_coco\n    In Collection: CornerNet\n    Config: configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py\n    Metadata:\n      Training Resources: 10x V100 GPUs\n      Batch Size: 50\n      Training Memory (GB): 13.9\n      inference time (ms/im):\n        - value: 238.1\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 210\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco/cornernet_hourglass104_mstest_10x5_210e_coco_20200824_185720-5fefbf1c.pth\n\n  - Name: cornernet_hourglass104_mstest_8x6_210e_coco\n    In Collection: CornerNet\n    Config: configs/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco.py\n    Metadata:\n      Batch Size: 48\n      Training Memory (GB): 15.9\n      inference time (ms/im):\n        - value: 238.1\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 210\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_8x6_210e_coco/cornernet_hourglass104_mstest_8x6_210e_coco_20200825_150618-79b44c30.pth\n\n  - Name: cornernet_hourglass104_mstest_32x3_210e_coco\n    In Collection: CornerNet\n    Config: configs/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco.py\n    Metadata:\n      Training Resources: 32x V100 GPUs\n      Batch Size: 96\n      Training Memory (GB): 9.5\n      inference time (ms/im):\n        - value: 256.41\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 210\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/cornernet/cornernet_hourglass104_mstest_32x3_210e_coco/cornernet_hourglass104_mstest_32x3_210e_coco_20200819_203110-1efaea91.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                _delete_=True,\n                type='DeformRoIPoolPack',\n                output_size=7,\n                output_channels=256),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32])))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcn/metafile.yml",
    "content": "Collections:\n  - Name: Deformable Convolutional Networks\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Deformable Convolution\n    Paper:\n      URL: https://arxiv.org/abs/1703.06211\n      Title: \"Deformable Convolutional Networks\"\n    README: configs/dcn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/dcn/deform_conv.py#L15\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 56.18\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-d68aed1e.pth\n\n  - Name: faster_rcnn_r50_fpn_dpool_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/faster_rcnn_r50_fpn_dpool_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      inference time (ms/im):\n        - value: 58.14\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_dpool_1x_coco/faster_rcnn_r50_fpn_dpool_1x_coco_20200307-90d3c01d.pth\n\n  - Name: faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 80\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco/faster_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-1377f13d.pth\n\n  - Name: faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.3\n      inference time (ms/im):\n        - value: 100\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/faster_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco_20200203-4f85c69c.pth\n\n  - Name: mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      inference time (ms/im):\n        - value: 64.94\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200203-4d9ad43b.pth\n\n  - Name: mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n        - Mixed Precision Training\n      Training Memory (GB): 3.0\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_dconv_c3-c5_1x_coco_20210520_180247-c06429d2.pth\n\n  - Name: mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.5\n      inference time (ms/im):\n        - value: 85.47\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200216-a71f5bce.pth\n\n  - Name: cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      inference time (ms/im):\n        - value: 68.49\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200130-2f1fca44.pth\n\n  - Name: cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      inference time (ms/im):\n        - value: 90.91\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200203-3b2f0594.pth\n\n  - Name: cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 100\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r50_fpn_dconv_c3-c5_1x_coco_20200202-42e767a2.pth\n\n  - Name: cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.0\n      inference time (ms/im):\n        - value: 116.28\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_r101_fpn_dconv_c3-c5_1x_coco_20200204-df0c5f10.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks\n    Config: configs/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.2\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco-e75f90c8.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcnv2/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=4, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcnv2/faster_rcnn_r50_fpn_mdpool_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                _delete_=True,\n                type='ModulatedDeformRoIPoolPack',\n                output_size=7,\n                output_channels=256),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32])))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcnv2/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcnv2/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dcnv2/metafile.yml",
    "content": "Collections:\n  - Name: Deformable Convolutional Networks v2\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Deformable Convolution\n    Paper:\n      URL: https://arxiv.org/abs/1811.11168\n      Title: \"Deformable ConvNets v2: More Deformable, Better Results\"\n    README: configs/dcnv2/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/dcn/deform_conv.py#L15\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks v2\n    Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.1\n      inference time (ms/im):\n        - value: 56.82\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200130-d099253b.pth\n\n  - Name: faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco\n    In Collection: Deformable Convolutional Networks v2\n    Config: configs/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.2\n      inference time (ms/im):\n        - value: 57.47\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco/faster_rcnn_r50_fpn_mdconv_c3-c5_group4_1x_coco_20200130-01262257.pth\n\n  - Name: faster_rcnn_r50_fpn_mdpool_1x_coco\n    In Collection: Deformable Convolutional Networks v2\n    Config: configs/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.8\n      inference time (ms/im):\n        - value: 60.24\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/faster_rcnn_r50_fpn_mdpool_1x_coco/faster_rcnn_r50_fpn_mdpool_1x_coco_20200307-c0df27ff.pth\n\n  - Name: mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks v2\n    Config: configs/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      inference time (ms/im):\n        - value: 66.23\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dcn/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_mdconv_c3-c5_1x_coco_20200203-ad97591f.pth\n\n  - Name: mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco\n    In Collection: Deformable Convolutional Networks v2\n    Config: configs/dcn/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.1\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n        - Mixed Precision Training\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco/mask_rcnn_r50_fpn_fp16_mdconv_c3-c5_1x_coco_20210520_180434-cf8fefa5.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ddod/ddod_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='DDOD',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='DDODHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        loss_iou=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    train_cfg=dict(\n        # assigner is mean cls_assigner\n        assigner=dict(type='ATSSAssigner', topk=9, alpha=0.8),\n        reg_assigner=dict(type='ATSSAssigner', topk=9, alpha=0.5),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n\n# This `persistent_workers` is only valid when PyTorch>=1.7.0\ndata = dict(persistent_workers=True)\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ddod/metafile.yml",
    "content": "Collections:\n  - Name: DDOD\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - DDOD\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/pdf/2107.02963.pdf\n      Title: 'Disentangle Your Dense Object Detector'\n    README: configs/ddod/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.25.0/mmdet/models/detectors/ddod.py#L6\n      Version: v2.25.0\n\nModels:\n  - Name: ddod_r50_fpn_1x_coco\n    In Collection: DDOD\n    Config: configs/ddod/ddod_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.4\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ddod/ddod_r50_fpn_1x_coco/ddod_r50_fpn_1x_coco_20220523_223737-29b2fc67.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/deepfashion/mask_rcnn_r50_fpn_15e_deepfashion.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/deepfashion.py', '../_base_/schedules/schedule_1x.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(num_classes=15), mask_head=dict(num_classes=15)))\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=15)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='DeformableDETR',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='ChannelMapper',\n        in_channels=[512, 1024, 2048],\n        kernel_size=1,\n        out_channels=256,\n        act_cfg=None,\n        norm_cfg=dict(type='GN', num_groups=32),\n        num_outs=4),\n    bbox_head=dict(\n        type='DeformableDETRHead',\n        num_query=300,\n        num_classes=80,\n        in_channels=2048,\n        sync_cls_avg_factor=True,\n        as_two_stage=False,\n        transformer=dict(\n            type='DeformableDetrTransformer',\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=dict(\n                        type='MultiScaleDeformableAttention', embed_dims=256),\n                    feedforward_channels=1024,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm'))),\n            decoder=dict(\n                type='DeformableDetrTransformerDecoder',\n                num_layers=6,\n                return_intermediate=True,\n                transformerlayers=dict(\n                    type='DetrTransformerDecoderLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1),\n                        dict(\n                            type='MultiScaleDeformableAttention',\n                            embed_dims=256)\n                    ],\n                    feedforward_channels=1024,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                     'ffn', 'norm')))),\n        positional_encoding=dict(\n            type='SinePositionalEncoding',\n            num_feats=128,\n            normalize=True,\n            offset=-0.5),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=2.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n        loss_iou=dict(type='GIoULoss', loss_weight=2.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='HungarianAssigner',\n            cls_cost=dict(type='FocalLossCost', weight=2.0),\n            reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),\n            iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),\n    test_cfg=dict(max_per_img=100))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different\n# from the default setting in mmdet.\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[\n            [\n                dict(\n                    type='Resize',\n                    img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                               (576, 1333), (608, 1333), (640, 1333),\n                               (672, 1333), (704, 1333), (736, 1333),\n                               (768, 1333), (800, 1333)],\n                    multiscale_mode='value',\n                    keep_ratio=True)\n            ],\n            [\n                dict(\n                    type='Resize',\n                    # The radio of all image in train dataset < 7\n                    # follow the original impl\n                    img_scale=[(400, 4200), (500, 4200), (600, 4200)],\n                    multiscale_mode='value',\n                    keep_ratio=True),\n                dict(\n                    type='RandomCrop',\n                    crop_type='absolute_range',\n                    crop_size=(384, 600),\n                    allow_negative_crop=True),\n                dict(\n                    type='Resize',\n                    img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                               (576, 1333), (608, 1333), (640, 1333),\n                               (672, 1333), (704, 1333), (736, 1333),\n                               (768, 1333), (800, 1333)],\n                    multiscale_mode='value',\n                    override=True,\n                    keep_ratio=True)\n            ]\n        ]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=1),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\n# test_pipeline, NOTE the Pad's size_divisor is different from the default\n# setting (size_divisor=32). While there is little effect on the performance\n# whether we use the default setting or use size_divisor=1.\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=1),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(filter_empty_gt=False, pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=2e-4,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone': dict(lr_mult=0.1),\n            'sampling_offsets': dict(lr_mult=0.1),\n            'reference_points': dict(lr_mult=0.1)\n        }))\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\n# learning policy\nlr_config = dict(policy='step', step=[40])\nrunner = dict(type='EpochBasedRunner', max_epochs=50)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (16 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=32)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py",
    "content": "_base_ = 'deformable_detr_r50_16x2_50e_coco.py'\nmodel = dict(bbox_head=dict(with_box_refine=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py",
    "content": "_base_ = 'deformable_detr_refine_r50_16x2_50e_coco.py'\nmodel = dict(bbox_head=dict(as_two_stage=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/deformable_detr/metafile.yml",
    "content": "Collections:\n  - Name: Deformable DETR\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Multi Scale Train\n        - Gradient Clip\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n        - Transformer\n    Paper:\n      URL: https://openreview.net/forum?id=gZ9hCDWe6ke\n      Title: 'Deformable DETR: Deformable Transformers for End-to-End Object Detection'\n    README: configs/deformable_detr/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/detectors/deformable_detr.py#L6\n      Version: v2.12.0\n\nModels:\n  - Name: deformable_detr_r50_16x2_50e_coco\n    In Collection: Deformable DETR\n    Config: configs/deformable_detr/deformable_detr_r50_16x2_50e_coco.py\n    Metadata:\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_r50_16x2_50e_coco/deformable_detr_r50_16x2_50e_coco_20210419_220030-a12b9512.pth\n\n  - Name: deformable_detr_refine_r50_16x2_50e_coco\n    In Collection: Deformable DETR\n    Config: configs/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco.py\n    Metadata:\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_refine_r50_16x2_50e_coco/deformable_detr_refine_r50_16x2_50e_coco_20210419_220503-5f5dff21.pth\n\n  - Name: deformable_detr_twostage_refine_r50_16x2_50e_coco\n    In Collection: Deformable DETR\n    Config: configs/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco.py\n    Metadata:\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/deformable_detr/deformable_detr_twostage_refine_r50_16x2_50e_coco/deformable_detr_twostage_refine_r50_16x2_50e_coco_20210419_220613-9d28ab72.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        output_img=True),\n    neck=dict(\n        type='RFP',\n        rfp_steps=2,\n        aspp_out_channels=64,\n        aspp_dilations=(1, 3, 6, 1),\n        rfp_backbone=dict(\n            rfp_inplanes=256,\n            type='DetectoRS_ResNet',\n            depth=50,\n            num_stages=4,\n            out_indices=(0, 1, 2, 3),\n            frozen_stages=1,\n            norm_cfg=dict(type='BN', requires_grad=True),\n            norm_eval=True,\n            conv_cfg=dict(type='ConvAWS'),\n            pretrained='torchvision://resnet50',\n            style='pytorch')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/cascade_rcnn_r50_sac_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        sac=dict(type='SAC', use_deform=True),\n        stage_with_sac=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        sac=dict(type='SAC', use_deform=True),\n        stage_with_sac=(False, True, True, True),\n        output_img=True),\n    neck=dict(\n        type='RFP',\n        rfp_steps=2,\n        aspp_out_channels=64,\n        aspp_dilations=(1, 3, 6, 1),\n        rfp_backbone=dict(\n            rfp_inplanes=256,\n            type='DetectoRS_ResNet',\n            depth=50,\n            num_stages=4,\n            out_indices=(0, 1, 2, 3),\n            frozen_stages=1,\n            norm_cfg=dict(type='BN', requires_grad=True),\n            norm_eval=True,\n            conv_cfg=dict(type='ConvAWS'),\n            sac=dict(type='SAC', use_deform=True),\n            stage_with_sac=(False, True, True, True),\n            pretrained='torchvision://resnet50',\n            style='pytorch')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/detectors_htc_r101_20e_coco.py",
    "content": "_base_ = '../htc/htc_r101_fpn_20e_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        sac=dict(type='SAC', use_deform=True),\n        stage_with_sac=(False, True, True, True),\n        output_img=True),\n    neck=dict(\n        type='RFP',\n        rfp_steps=2,\n        aspp_out_channels=64,\n        aspp_dilations=(1, 3, 6, 1),\n        rfp_backbone=dict(\n            rfp_inplanes=256,\n            type='DetectoRS_ResNet',\n            depth=101,\n            num_stages=4,\n            out_indices=(0, 1, 2, 3),\n            frozen_stages=1,\n            norm_cfg=dict(type='BN', requires_grad=True),\n            norm_eval=True,\n            conv_cfg=dict(type='ConvAWS'),\n            sac=dict(type='SAC', use_deform=True),\n            stage_with_sac=(False, True, True, True),\n            pretrained='torchvision://resnet101',\n            style='pytorch')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/detectors_htc_r50_1x_coco.py",
    "content": "_base_ = '../htc/htc_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        sac=dict(type='SAC', use_deform=True),\n        stage_with_sac=(False, True, True, True),\n        output_img=True),\n    neck=dict(\n        type='RFP',\n        rfp_steps=2,\n        aspp_out_channels=64,\n        aspp_dilations=(1, 3, 6, 1),\n        rfp_backbone=dict(\n            rfp_inplanes=256,\n            type='DetectoRS_ResNet',\n            depth=50,\n            num_stages=4,\n            out_indices=(0, 1, 2, 3),\n            frozen_stages=1,\n            norm_cfg=dict(type='BN', requires_grad=True),\n            norm_eval=True,\n            conv_cfg=dict(type='ConvAWS'),\n            sac=dict(type='SAC', use_deform=True),\n            stage_with_sac=(False, True, True, True),\n            pretrained='torchvision://resnet50',\n            style='pytorch')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/htc_r50_rfp_1x_coco.py",
    "content": "_base_ = '../htc/htc_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        output_img=True),\n    neck=dict(\n        type='RFP',\n        rfp_steps=2,\n        aspp_out_channels=64,\n        aspp_dilations=(1, 3, 6, 1),\n        rfp_backbone=dict(\n            rfp_inplanes=256,\n            type='DetectoRS_ResNet',\n            depth=50,\n            num_stages=4,\n            out_indices=(0, 1, 2, 3),\n            frozen_stages=1,\n            norm_cfg=dict(type='BN', requires_grad=True),\n            norm_eval=True,\n            conv_cfg=dict(type='ConvAWS'),\n            pretrained='torchvision://resnet50',\n            style='pytorch')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/htc_r50_sac_1x_coco.py",
    "content": "_base_ = '../htc/htc_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        type='DetectoRS_ResNet',\n        conv_cfg=dict(type='ConvAWS'),\n        sac=dict(type='SAC', use_deform=True),\n        stage_with_sac=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detectors/metafile.yml",
    "content": "Collections:\n  - Name: DetectoRS\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ASPP\n        - FPN\n        - RFP\n        - RPN\n        - ResNet\n        - RoIAlign\n        - SAC\n    Paper:\n      URL: https://arxiv.org/abs/2006.02334\n      Title: 'DetectoRS: Detecting Objects with Recursive Feature Pyramid and Switchable Atrous Convolution'\n    README: configs/detectors/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/backbones/detectors_resnet.py#L205\n      Version: v2.2.0\n\nModels:\n  - Name: cascade_rcnn_r50_rfp_1x_coco\n    In Collection: DetectoRS\n    Config: configs/detectors/cascade_rcnn_r50_rfp_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.5\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_rfp_1x_coco/cascade_rcnn_r50_rfp_1x_coco-8cf51bfd.pth\n\n  - Name: cascade_rcnn_r50_sac_1x_coco\n    In Collection: DetectoRS\n    Config: configs/detectors/cascade_rcnn_r50_sac_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.6\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/cascade_rcnn_r50_sac_1x_coco/cascade_rcnn_r50_sac_1x_coco-24bfda62.pth\n\n  - Name: detectors_cascade_rcnn_r50_1x_coco\n    In Collection: DetectoRS\n    Config: configs/detectors/detectors_cascade_rcnn_r50_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.9\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_cascade_rcnn_r50_1x_coco/detectors_cascade_rcnn_r50_1x_coco-32a10ba0.pth\n\n  - Name: htc_r50_rfp_1x_coco\n    In Collection: DetectoRS\n    Config: configs/detectors/htc_r50_rfp_1x_coco.py\n    Metadata:\n      Training Memory (GB): 11.2\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  40.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_rfp_1x_coco/htc_r50_rfp_1x_coco-8ff87c51.pth\n\n  - Name: htc_r50_sac_1x_coco\n    In Collection: DetectoRS\n    Config: configs/detectors/htc_r50_sac_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.3\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  40.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/htc_r50_sac_1x_coco/htc_r50_sac_1x_coco-bfa60c54.pth\n\n  - Name: detectors_htc_r50_1x_coco\n    In Collection: DetectoRS\n    Config: configs/detectors/detectors_htc_r50_1x_coco.py\n    Metadata:\n      Training Memory (GB): 13.6\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 49.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  42.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detectors/detectors_htc_r50_1x_coco/detectors_htc_r50_1x_coco-329b1453.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detr/detr_r50_8x2_150e_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='DETR',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(3, ),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    bbox_head=dict(\n        type='DETRHead',\n        num_classes=80,\n        in_channels=2048,\n        transformer=dict(\n            type='Transformer',\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1)\n                    ],\n                    feedforward_channels=2048,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm'))),\n            decoder=dict(\n                type='DetrTransformerDecoder',\n                return_intermediate=True,\n                num_layers=6,\n                transformerlayers=dict(\n                    type='DetrTransformerDecoderLayer',\n                    attn_cfgs=dict(\n                        type='MultiheadAttention',\n                        embed_dims=256,\n                        num_heads=8,\n                        dropout=0.1),\n                    feedforward_channels=2048,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                     'ffn', 'norm')),\n            )),\n        positional_encoding=dict(\n            type='SinePositionalEncoding', num_feats=128, normalize=True),\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            bg_cls_weight=0.1,\n            use_sigmoid=False,\n            loss_weight=1.0,\n            class_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n        loss_iou=dict(type='GIoULoss', loss_weight=2.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='HungarianAssigner',\n            cls_cost=dict(type='ClassificationCost', weight=1.),\n            reg_cost=dict(type='BBoxL1Cost', weight=5.0, box_format='xywh'),\n            iou_cost=dict(type='IoUCost', iou_mode='giou', weight=2.0))),\n    test_cfg=dict(max_per_img=100))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n# train_pipeline, NOTE the img_scale and the Pad's size_divisor is different\n# from the default setting in mmdet.\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=1),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\n# test_pipeline, NOTE the Pad's size_divisor is different from the default\n# setting (size_divisor=32). While there is little effect on the performance\n# whether we use the default setting or use size_divisor=1.\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=1),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(\n        custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))\noptimizer_config = dict(grad_clip=dict(max_norm=0.1, norm_type=2))\n# learning policy\nlr_config = dict(policy='step', step=[100])\nrunner = dict(type='EpochBasedRunner', max_epochs=150)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/detr/metafile.yml",
    "content": "Collections:\n  - Name: DETR\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Multi Scale Train\n        - Gradient Clip\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n        - Transformer\n    Paper:\n      URL: https://arxiv.org/abs/2005.12872\n      Title: 'End-to-End Object Detection with Transformers'\n    README: configs/detr/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/detectors/detr.py#L7\n      Version: v2.7.0\n\nModels:\n  - Name: detr_r50_8x2_150e_coco\n    In Collection: DETR\n    Config: configs/detr/detr_r50_8x2_150e_coco.py\n    Metadata:\n      Training Memory (GB): 7.9\n      Epochs: 150\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/detr/detr_r50_8x2_150e_coco/detr_r50_8x2_150e_coco_20201130_194835-2c4b8974.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        type='DoubleHeadRoIHead',\n        reg_roi_scale_factor=1.3,\n        bbox_head=dict(\n            _delete_=True,\n            type='DoubleConvFCBBoxHead',\n            num_convs=4,\n            num_fcs=2,\n            in_channels=256,\n            conv_out_channels=1024,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=2.0),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=2.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/double_heads/metafile.yml",
    "content": "Collections:\n  - Name: Rethinking Classification and Localization for Object Detection\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - RPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/pdf/1904.06493\n      Title: 'Rethinking Classification and Localization for Object Detection'\n    README: configs/double_heads/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/roi_heads/double_roi_head.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: dh_faster_rcnn_r50_fpn_1x_coco\n    In Collection: Rethinking Classification and Localization for Object Detection\n    Config: configs/double_heads/dh_faster_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.8\n      inference time (ms/im):\n        - value: 105.26\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/double_heads/dh_faster_rcnn_r50_fpn_1x_coco/dh_faster_rcnn_r50_fpn_1x_coco_20200130-586b67df.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dyhead/atss_r50_caffe_fpn_dyhead_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='ATSS',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    neck=[\n        dict(\n            type='FPN',\n            in_channels=[256, 512, 1024, 2048],\n            out_channels=256,\n            start_level=1,\n            add_extra_convs='on_output',\n            num_outs=5),\n        dict(\n            type='DyHead',\n            in_channels=256,\n            out_channels=256,\n            num_blocks=6,\n            # disable zero_init_offset to follow official implementation\n            zero_init_offset=False)\n    ],\n    bbox_head=dict(\n        type='ATSSHead',\n        num_classes=80,\n        in_channels=256,\n        pred_kernel_size=1,  # follow DyHead official implementation\n        stacked_convs=0,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128],\n            center_offset=0.5),  # follow DyHead official implementation\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n\n# use caffe img_norm, size_divisor=128, pillow resize\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=(1333, 800),\n        keep_ratio=True,\n        backend='pillow'),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=128),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True, backend='pillow'),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=128),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dyhead/atss_r50_fpn_dyhead_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='ATSS',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=[\n        dict(\n            type='FPN',\n            in_channels=[256, 512, 1024, 2048],\n            out_channels=256,\n            start_level=1,\n            add_extra_convs='on_output',\n            num_outs=5),\n        dict(type='DyHead', in_channels=256, out_channels=256, num_blocks=6)\n    ],\n    bbox_head=dict(\n        type='ATSSHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=0,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth'  # noqa\nmodel = dict(\n    type='ATSS',\n    backbone=dict(\n        type='SwinTransformer',\n        pretrain_img_size=384,\n        embed_dims=192,\n        depths=[2, 2, 18, 2],\n        num_heads=[6, 12, 24, 48],\n        window_size=12,\n        mlp_ratio=4,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.2,\n        patch_norm=True,\n        out_indices=(1, 2, 3),\n        # Please only add indices that would be used\n        # in FPN, otherwise some parameter will not be used\n        with_cp=False,\n        convert_weights=True,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    neck=[\n        dict(\n            type='FPN',\n            in_channels=[384, 768, 1536],\n            out_channels=256,\n            start_level=0,\n            add_extra_convs='on_output',\n            num_outs=5),\n        dict(\n            type='DyHead',\n            in_channels=256,\n            out_channels=256,\n            num_blocks=6,\n            # disable zero_init_offset to follow official implementation\n            zero_init_offset=False)\n    ],\n    bbox_head=dict(\n        type='ATSSHead',\n        num_classes=80,\n        in_channels=256,\n        pred_kernel_size=1,  # follow DyHead official implementation\n        stacked_convs=0,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128],\n            center_offset=0.5),  # follow DyHead official implementation\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(2000, 480), (2000, 1200)],\n        multiscale_mode='range',\n        keep_ratio=True,\n        backend='pillow'),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=128),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(2000, 1200),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True, backend='pillow'),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=128),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=2,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='bbox')\n\n# optimizer\noptimizer_config = dict(grad_clip=None)\noptimizer = dict(\n    type='AdamW',\n    lr=0.00005,\n    betas=(0.9, 0.999),\n    weight_decay=0.05,\n    paramwise_cfg=dict(\n        custom_keys={\n            'absolute_pos_embed': dict(decay_mult=0.),\n            'relative_position_bias_table': dict(decay_mult=0.),\n            'norm': dict(decay_mult=0.)\n        }))\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dyhead/metafile.yml",
    "content": "Collections:\n  - Name: DyHead\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 4x T4 GPUs\n      Architecture:\n        - ATSS\n        - DyHead\n        - FPN\n        - ResNet\n        - Deformable Convolution\n        - Pyramid Convolution\n    Paper:\n      URL: https://arxiv.org/abs/2106.08322\n      Title: 'Dynamic Head: Unifying Object Detection Heads with Attentions'\n    README: configs/dyhead/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/mmdet/models/necks/dyhead.py#L130\n      Version: v2.22.0\n\nModels:\n  - Name: atss_r50_caffe_fpn_dyhead_1x_coco\n    In Collection: DyHead\n    Config: configs/dyhead/atss_r50_caffe_fpn_dyhead_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.4\n      inference time (ms/im):\n        - value: 75.7\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_for_reproduction_1x_coco/atss_r50_fpn_dyhead_for_reproduction_4x4_1x_coco_20220107_213939-162888e6.pth\n\n  - Name: atss_r50_fpn_dyhead_1x_coco\n    In Collection: DyHead\n    Config: configs/dyhead/atss_r50_fpn_dyhead_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.9\n      inference time (ms/im):\n        - value: 73.1\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_r50_fpn_dyhead_4x4_1x_coco/atss_r50_fpn_dyhead_4x4_1x_coco_20211219_023314-eaa620c6.pth\n\n  - Name: atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco\n    In Collection: DyHead\n    Config: configs/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco.py\n    Metadata:\n      Training Memory (GB): 58.4\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 56.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dyhead/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco/atss_swin-l-p4-w12_fpn_dyhead_mstrain_2x_coco_20220509_100315-bc5b6516.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        type='DynamicRoIHead',\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False,\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    train_cfg=dict(\n        rpn_proposal=dict(nms=dict(iou_threshold=0.85)),\n        rcnn=dict(\n            dynamic_rcnn=dict(\n                iou_topk=75,\n                beta_topk=10,\n                update_iter_interval=100,\n                initial_iou=0.4,\n                initial_beta=1.0))),\n    test_cfg=dict(rpn=dict(nms=dict(iou_threshold=0.85))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/dynamic_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Dynamic R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Dynamic R-CNN\n        - FPN\n        - RPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/pdf/2004.06002\n      Title: 'Dynamic R-CNN: Towards High Quality Object Detection via Dynamic Training'\n    README: configs/dynamic_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/roi_heads/dynamic_roi_head.py#L11\n      Version: v2.2.0\n\nModels:\n  - Name: dynamic_rcnn_r50_fpn_1x_coco\n    In Collection: Dynamic R-CNN\n    Config: configs/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.8\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/dynamic_rcnn/dynamic_rcnn_r50_fpn_1x/dynamic_rcnn_r50_fpn_1x-62a3f276.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/efficientnet/metafile.yml",
    "content": "Models:\n  - Name: retinanet_effb3_fpn_crop896_8x4_1x_coco\n    In Collection: RetinaNet\n    Config: configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco/retinanet_effb3_fpn_crop896_8x4_1x_coco_20220322_234806-615a0dda.pth\n    Paper:\n      URL: https://arxiv.org/abs/1905.11946v5\n      Title: 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks'\n    README: configs/efficientnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.23.0/mmdet/models/backbones/efficientnet.py#L159\n      Version: v2.23.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/efficientnet/retinanet_effb3_fpn_crop896_8x4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\n\ncudnn_benchmark = True\nnorm_cfg = dict(type='BN', requires_grad=True)\ncheckpoint = 'https://download.openmmlab.com/mmclassification/v0/efficientnet/efficientnet-b3_3rdparty_8xb32-aa_in1k_20220119-5b4887a0.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='EfficientNet',\n        arch='b3',\n        drop_path_rate=0.2,\n        out_indices=(3, 4, 5),\n        frozen_stages=0,\n        norm_cfg=dict(\n            type='SyncBN', requires_grad=True, eps=1e-3, momentum=0.01),\n        norm_eval=False,\n        init_cfg=dict(\n            type='Pretrained', prefix='backbone', checkpoint=checkpoint)),\n    neck=dict(\n        in_channels=[48, 136, 384],\n        start_level=0,\n        out_channels=256,\n        relu_before_extra_convs=True,\n        no_norm_on_lateral=True,\n        norm_cfg=norm_cfg),\n    bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg),\n    # training and testing settings\n    train_cfg=dict(assigner=dict(neg_iou_thr=0.5)))\n\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nimg_size = (896, 896)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=img_size,\n        ratio_range=(0.8, 1.2),\n        keep_ratio=True),\n    dict(type='RandomCrop', crop_size=img_size),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=img_size),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=img_size,\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size=img_size),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer_config = dict(grad_clip=None)\noptimizer = dict(\n    type='SGD',\n    lr=0.04,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.1,\n    step=[8, 11])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (4 samples per GPU)\nauto_scale_lr = dict(base_batch_size=32)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(plugins=[\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            stages=(False, False, True, True),\n            position='after_conv2')\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        plugins=[\n            dict(\n                cfg=dict(\n                    type='GeneralizedAttention',\n                    spatial_range=-1,\n                    num_heads=8,\n                    attention_type='0010',\n                    kv_stride=2),\n                stages=(False, False, True, True),\n                position='after_conv2')\n        ],\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(plugins=[\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='1111',\n                kv_stride=2),\n            stages=(False, False, True, True),\n            position='after_conv2')\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        plugins=[\n            dict(\n                cfg=dict(\n                    type='GeneralizedAttention',\n                    spatial_range=-1,\n                    num_heads=8,\n                    attention_type='1111',\n                    kv_stride=2),\n                stages=(False, False, True, True),\n                position='after_conv2')\n        ],\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/empirical_attention/metafile.yml",
    "content": "Collections:\n  - Name: Empirical Attention\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Deformable Convolution\n        - FPN\n        - RPN\n        - ResNet\n        - RoIAlign\n        - Spatial Attention\n    Paper:\n      URL: https://arxiv.org/pdf/1904.05873\n      Title: 'An Empirical Study of Spatial Attention Mechanisms in Deep Networks'\n    README: configs/empirical_attention/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/generalized_attention.py#L10\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_attention_1111_1x_coco\n    In Collection: Empirical Attention\n    Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.0\n      inference time (ms/im):\n        - value: 72.46\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_1x_coco/faster_rcnn_r50_fpn_attention_1111_1x_coco_20200130-403cccba.pth\n\n  - Name: faster_rcnn_r50_fpn_attention_0010_1x_coco\n    In Collection: Empirical Attention\n    Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.2\n      inference time (ms/im):\n        - value: 54.35\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_1x_coco/faster_rcnn_r50_fpn_attention_0010_1x_coco_20200130-7cb0c14d.pth\n\n  - Name: faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco\n    In Collection: Empirical Attention\n    Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.0\n      inference time (ms/im):\n        - value: 78.74\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco/faster_rcnn_r50_fpn_attention_1111_dcn_1x_coco_20200130-8b2523a6.pth\n\n  - Name: faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco\n    In Collection: Empirical Attention\n    Config: configs/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.2\n      inference time (ms/im):\n        - value: 58.48\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/empirical_attention/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco/faster_rcnn_r50_fpn_attention_0010_dcn_1x_coco_20200130-1a2e831d.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fast_rcnn/fast_rcnn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './fast_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fast_rcnn/fast_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = './fast_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fast_rcnn/fast_rcnn_r101_fpn_2x_coco.py",
    "content": "_base_ = './fast_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fast_rcnn/fast_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = './fast_rcnn_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=2000),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=None),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='ToTensor', keys=['proposals']),\n            dict(\n                type='ToDataContainer',\n                fields=[dict(key='proposals', stack=False)]),\n            dict(type='Collect', keys=['img', 'proposals']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/fast_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=2000),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=None),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='ToTensor', keys=['proposals']),\n            dict(\n                type='ToDataContainer',\n                fields=[dict(key='proposals', stack=False)]),\n            dict(type='Collect', keys=['img', 'proposals']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_train2017.pkl',\n        pipeline=train_pipeline),\n    val=dict(\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        pipeline=test_pipeline),\n    test=dict(\n        proposal_file=data_root + 'proposals/rpn_r50_fpn_1x_val2017.pkl',\n        pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fast_rcnn/fast_rcnn_r50_fpn_2x_coco.py",
    "content": "_base_ = './fast_rcnn_r50_fpn_1x_coco.py'\n\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_caffe_c4.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_caffe_c4_1x_coco.py'\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_caffe_dc5.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_caffe_dc5.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py'\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_90k_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_caffe_fpn_1x_coco.py'\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[60000, 80000])\n\n# Runner type\nrunner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000)\n\ncheckpoint_config = dict(interval=10000)\nevaluation = dict(interval=10000, metric='bbox')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person-bicycle-car.py",
    "content": "_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py'\nmodel = dict(roi_head=dict(bbox_head=dict(num_classes=3)))\nclasses = ('person', 'bicycle', 'car')\ndata = dict(\n    train=dict(classes=classes),\n    val=dict(classes=classes),\n    test=dict(classes=classes))\n\nload_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth'  # noqa\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco-person.py",
    "content": "_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py'\nmodel = dict(roi_head=dict(bbox_head=dict(num_classes=1)))\nclasses = ('person', )\ndata = dict(\n    train=dict(classes=classes),\n    val=dict(classes=classes),\n    test=dict(classes=classes))\n\nload_from = 'https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_bbox_mAP-0.398_20200504_163323-30042637.pth'  # noqa\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_90k_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_caffe_fpn_mstrain_1x_coco.py'\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[60000, 80000])\n\n# Runner type\nrunner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000)\n\ncheckpoint_config = dict(interval=10000)\nevaluation = dict(interval=10000, metric='bbox')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            reg_decoded_bbox=True,\n            loss_bbox=dict(type='BoundedIoULoss', loss_weight=10.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_ciou_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            reg_decoded_bbox=True,\n            loss_bbox=dict(type='CIoULoss', loss_weight=12.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_fp16_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\n# fp16 settings\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            reg_decoded_bbox=True,\n            loss_bbox=dict(type='GIoULoss', loss_weight=10.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            reg_decoded_bbox=True,\n            loss_bbox=dict(type='IoULoss', loss_weight=10.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(train_cfg=dict(rcnn=dict(sampler=dict(type='OHEMSampler'))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_soft_nms_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.05,\n            nms=dict(type='soft_nms', iou_threshold=0.5),\n            max_per_img=100)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\ncheckpoint = 'https://download.pytorch.org/models/resnet50-11ad3fa6.pth'\nmodel = dict(\n    backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=checkpoint)))\n\n# `lr` and `weight_decay` have been searched to be optimal.\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.1,\n    paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py'\n]\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py'\n]\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=8,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnext101_32x8d')))\n\n# ResNeXt-101-32x8d model trained with Caffe2 at FB,\n# so the mean and std need to be changed.\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675],\n    std=[57.375, 57.120, 58.395],\n    to_rgb=False)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py'\n]\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/faster_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Faster R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - RPN\n        - ResNet\n        - RoIPool\n    Paper:\n      URL: https://arxiv.org/abs/1506.01497\n      Title: \"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks\"\n    README: configs/faster_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/faster_rcnn.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_caffe_c4_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 35.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_1x_coco/faster_rcnn_r50_caffe_c4_1x_coco_20220316_150152-3f885b85.pth\n\n  - Name: faster_rcnn_r50_caffe_c4_mstrain_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 35.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_c4_mstrain_1x_coco/faster_rcnn_r50_caffe_c4_mstrain_1x_coco_20220316_150527-db276fed.pth\n\n  - Name: faster_rcnn_r50_caffe_dc5_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_1x_coco/faster_rcnn_r50_caffe_dc5_1x_coco_20201030_151909-531f0f43.pth\n\n  - Name: faster_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.8\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco/faster_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.378_20200504_180032-c5925ee5.pth\n\n  - Name: faster_rcnn_r50_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 46.73\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth\n\n  - Name: faster_rcnn_r50_fpn_fp16_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_fp16_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.4\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n        - Mixed Precision Training\n      inference time (ms/im):\n        - value: 34.72\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP16\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/faster_rcnn_r50_fpn_fp16_1x_coco/faster_rcnn_r50_fpn_fp16_1x_coco_20200204-d4dc1471.pth\n\n  - Name: faster_rcnn_r50_fpn_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 46.73\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_2x_coco/faster_rcnn_r50_fpn_2x_coco_bbox_mAP-0.384_20200504_210434-a5d8aa15.pth\n\n  - Name: faster_rcnn_r101_caffe_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.7\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_1x_coco/faster_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.398_20200504_180057-b269e9dd.pth\n\n  - Name: faster_rcnn_r101_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 64.1\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_1x_coco/faster_rcnn_r101_fpn_1x_coco_20200130-f513f705.pth\n\n  - Name: faster_rcnn_r101_fpn_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r101_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 64.1\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_2x_coco/faster_rcnn_r101_fpn_2x_coco_bbox_mAP-0.398_20200504_210455-1d2dac9c.pth\n\n  - Name: faster_rcnn_x101_32x4d_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      inference time (ms/im):\n        - value: 72.46\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco/faster_rcnn_x101_32x4d_fpn_1x_coco_20200203-cff10310.pth\n\n  - Name: faster_rcnn_x101_32x4d_fpn_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      inference time (ms/im):\n        - value: 72.46\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_2x_coco/faster_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.412_20200506_041400-64a12c0b.pth\n\n  - Name: faster_rcnn_x101_64x4d_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      inference time (ms/im):\n        - value: 106.38\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_1x_coco/faster_rcnn_x101_64x4d_fpn_1x_coco_20200204-833ee192.pth\n\n  - Name: faster_rcnn_x101_64x4d_fpn_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      inference time (ms/im):\n        - value: 106.38\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_2x_coco/faster_rcnn_x101_64x4d_fpn_2x_coco_20200512_161033-5961fa95.pth\n\n  - Name: faster_rcnn_r50_fpn_iou_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.9\n    # re-release\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_iou_1x_coco/faster_rcnn_r50_fpn_iou_1x_coco_20200506_095954-938e81f0.pth\n\n  - Name: faster_rcnn_r50_fpn_giou_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_giou_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_giou_1x_coco-0eada910.pth\n\n  - Name: faster_rcnn_r50_fpn_bounded_iou_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_bounded_iou_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_1x_coco/faster_rcnn_r50_fpn_bounded_iou_1x_coco-98ad993b.pth\n\n  - Name: faster_rcnn_r50_caffe_dc5_mstrain_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco/faster_rcnn_r50_caffe_dc5_mstrain_1x_coco_20201028_233851-b33d21b9.pth\n\n  - Name: faster_rcnn_r50_caffe_dc5_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco/faster_rcnn_r50_caffe_dc5_mstrain_3x_coco_20201028_002107-34a53b2c.pth\n\n  - Name: faster_rcnn_r50_caffe_fpn_mstrain_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco.py\n    Metadata:\n      Training Memory (GB): 4.3\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco/faster_rcnn_r50_caffe_fpn_mstrain_2x_coco_bbox_mAP-0.397_20200504_231813-10b2de58.pth\n\n  - Name: faster_rcnn_r50_caffe_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco/faster_rcnn_r50_caffe_fpn_mstrain_3x_coco_20210526_095054-1f77628b.pth\n\n  - Name: faster_rcnn_r50_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 3.9\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_mstrain_3x_coco/faster_rcnn_r50_fpn_mstrain_3x_coco_20210524_110822-e10bd31c.pth\n\n  - Name: faster_rcnn_r101_caffe_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.6\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco/faster_rcnn_r101_caffe_fpn_mstrain_3x_coco_20210526_095742-a7ae426d.pth\n\n  - Name: faster_rcnn_r101_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.8\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r101_fpn_mstrain_3x_coco/faster_rcnn_r101_fpn_mstrain_3x_coco_20210524_110822-4d4d2ca8.pth\n\n  - Name: faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x4d_fpn_mstrain_3x_coco_20210524_124151-16b9b260.pth\n\n  - Name: faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 10.1\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco/faster_rcnn_x101_32x8d_fpn_mstrain_3x_coco_20210604_182954-002e082a.pth\n\n  - Name: faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 10.0\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco/faster_rcnn_x101_64x4d_fpn_mstrain_3x_coco_20210524_124528-26c63de6.pth\n\n  - Name: faster_rcnn_r50_fpn_tnr-pretrain_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 46.73\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/faster_rcnn/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco/faster_rcnn_r50_fpn_tnr-pretrain_1x_coco_20220320_085147-efedfda4.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py",
    "content": "_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    bbox_head=dict(\n        norm_on_bbox=True,\n        centerness_on_reg=True,\n        dcn_on_last_conv=False,\n        center_sampling=True,\n        conv_bias=True,\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.0)),\n    # training and testing settings\n    test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6)))\n\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\noptimizer_config = dict(_delete_=True, grad_clip=None)\n\nlr_config = dict(warmup='linear')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py",
    "content": "_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    bbox_head=dict(\n        norm_on_bbox=True,\n        centerness_on_reg=True,\n        dcn_on_last_conv=True,\n        center_sampling=True,\n        conv_bias=True,\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.0)),\n    # training and testing settings\n    test_cfg=dict(nms=dict(type='nms', iou_threshold=0.6)))\n\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\noptimizer_config = dict(_delete_=True, grad_clip=None)\n\nlr_config = dict(warmup='linear')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py",
    "content": "_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'\nmodel = dict(bbox_head=dict(center_sampling=True, center_sample_radius=1.5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py",
    "content": "_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py",
    "content": "_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet101_caffe')))\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    type='FCOS',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet50_caffe')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',  # use P5\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='FCOSHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128],\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='IoULoss', loss_weight=1.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.5),\n        max_per_img=100))\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(\n    lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='constant',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py",
    "content": "# TODO: Remove this config after benchmarking all related configs\n_base_ = 'fcos_r50_caffe_fpn_gn-head_1x_coco.py'\n\ndata = dict(samples_per_gpu=4, workers_per_gpu=4)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn-head_fp16_1x_bs8x8_coco.py",
    "content": "_base_ = ['./fcos_r50_caffe_fpn_gn-head_1x_coco.py']\n\ndata = dict(samples_per_gpu=8, workers_per_gpu=8)\n\n# optimizer\noptimizer = dict(lr=0.04)\nfp16 = dict(loss_scale='dynamic')\n\n# learning policy\n# In order to avoid non-convergence in the early stage of\n# mixed-precision training, the warmup in the lr_config is set to linear,\n# warmup_iters increases and warmup_ratio decreases.\nlr_config = dict(warmup='linear', warmup_iters=1000, warmup_ratio=1.0 / 10)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py",
    "content": "_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py",
    "content": "_base_ = './fcos_r50_caffe_fpn_gn-head_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(\n    lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fcos/metafile.yml",
    "content": "Collections:\n  - Name: FCOS\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - Group Normalization\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1904.01355\n      Title: 'FCOS: Fully Convolutional One-Stage Object Detection'\n    README: configs/fcos/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/fcos.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: fcos_r50_caffe_fpn_gn-head_1x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.6\n      inference time (ms/im):\n        - value: 44.05\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_1x_coco/fcos_r50_caffe_fpn_gn-head_1x_coco-821213aa.pth\n\n  - Name: fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_1x_coco-0a0d75a8.pth\n\n  - Name: fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.8\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco/fcos_center-normbbox-centeronreg-giou_r50_caffe_fpn_gn-head_dcn_1x_coco-ae4d8b3d.pth\n\n  - Name: fcos_r101_caffe_fpn_gn-head_1x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      inference time (ms/im):\n        - value: 57.8\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_1x_coco/fcos_r101_caffe_fpn_gn-head_1x_coco-0e37b982.pth\n\n  - Name: fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py\n    Metadata:\n      Training Memory (GB): 2.6\n      inference time (ms/im):\n        - value: 43.67\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r50_caffe_fpn_gn-head_mstrain_640-800_2x_coco-d92ceeea.pth\n\n  - Name: fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      inference time (ms/im):\n        - value: 57.8\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco/fcos_r101_caffe_fpn_gn-head_mstrain_640-800_2x_coco-511424d6.pth\n\n  - Name: fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco\n    In Collection: FCOS\n    Config: configs/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.0\n      inference time (ms/im):\n        - value: 103.09\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fcos/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco/fcos_x101_64x4d_fpn_gn-head_mstrain_640-800_2x_coco-ede514a8.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    bbox_head=dict(\n        with_deform=True,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    bbox_head=dict(\n        with_deform=True,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(\n        with_deform=True,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(\n        with_deform=True,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    type='FOVEA',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        num_outs=5,\n        add_extra_convs='on_input'),\n    bbox_head=dict(\n        type='FoveaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128],\n        base_edge_list=[16, 32, 64, 128, 256],\n        scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),\n        sigma=0.4,\n        with_deform=False,\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=1.50,\n            alpha=0.4,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(),\n    test_cfg=dict(\n        nms_pre=1000,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.5),\n        max_per_img=100))\ndata = dict(samples_per_gpu=4, workers_per_gpu=4)\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py",
    "content": "_base_ = './fovea_r50_fpn_4x4_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/foveabox/metafile.yml",
    "content": "Collections:\n  - Name: FoveaBox\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 4x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1904.03797\n      Title: 'FoveaBox: Beyond Anchor-based Object Detector'\n    README: configs/foveabox/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/fovea.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: fovea_r50_fpn_4x4_1x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_r50_fpn_4x4_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.6\n      inference time (ms/im):\n        - value: 41.49\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_1x_coco/fovea_r50_fpn_4x4_1x_coco_20200219-ee4d5303.pth\n\n  - Name: fovea_r50_fpn_4x4_2x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_r50_fpn_4x4_2x_coco.py\n    Metadata:\n      Training Memory (GB): 5.6\n      inference time (ms/im):\n        - value: 41.49\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r50_fpn_4x4_2x_coco/fovea_r50_fpn_4x4_2x_coco_20200203-2df792b1.pth\n\n  - Name: fovea_align_r50_fpn_gn-head_4x4_2x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py\n    Metadata:\n      Training Memory (GB): 8.1\n      inference time (ms/im):\n        - value: 51.55\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco/fovea_align_r50_fpn_gn-head_4x4_2x_coco_20200203-8987880d.pth\n\n  - Name: fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py\n    Metadata:\n      Training Memory (GB): 8.1\n      inference time (ms/im):\n        - value: 54.64\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200205-85ce26cb.pth\n\n  - Name: fovea_r101_fpn_4x4_1x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_r101_fpn_4x4_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.2\n      inference time (ms/im):\n        - value: 57.47\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_1x_coco/fovea_r101_fpn_4x4_1x_coco_20200219-05e38f1c.pth\n\n  - Name: fovea_r101_fpn_4x4_2x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_r101_fpn_4x4_2x_coco.py\n    Metadata:\n      Training Memory (GB): 11.7\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_r101_fpn_4x4_2x_coco/fovea_r101_fpn_4x4_2x_coco_20200208-02320ea4.pth\n\n  - Name: fovea_align_r101_fpn_gn-head_4x4_2x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco.py\n    Metadata:\n      Training Memory (GB): 11.7\n      inference time (ms/im):\n        - value: 68.03\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_4x4_2x_coco/fovea_align_r101_fpn_gn-head_4x4_2x_coco_20200208-c39a027a.pth\n\n  - Name: fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco\n    In Collection: FoveaBox\n    Config: configs/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py\n    Metadata:\n      Training Memory (GB): 11.7\n      inference time (ms/im):\n        - value: 68.03\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/foveabox/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco/fovea_align_r101_fpn_gn-head_mstrain_640-800_4x4_2x_coco_20200208-649c5eb6.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_fpg_crop640_50e_coco.py'\n\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    neck=dict(out_channels=128, inter_channels=128),\n    rpn_head=dict(in_channels=128),\n    roi_head=dict(\n        bbox_roi_extractor=dict(out_channels=128),\n        bbox_head=dict(in_channels=128)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py",
    "content": "_base_ = 'faster_rcnn_r50_fpn_crop640_50e_coco.py'\n\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    neck=dict(\n        type='FPG',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        inter_channels=256,\n        num_outs=5,\n        stack_times=9,\n        paths=['bu'] * 9,\n        same_down_trans=None,\n        same_up_trans=dict(\n            type='conv',\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_lateral_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_down_trans=dict(\n            type='interpolation_conv',\n            mode='nearest',\n            kernel_size=3,\n            norm_cfg=norm_cfg,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        across_up_trans=None,\n        across_skip_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        output_trans=dict(\n            type='last_conv',\n            kernel_size=3,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        norm_cfg=norm_cfg,\n        skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/faster_rcnn_r50_fpn_crop640_50e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    backbone=dict(norm_cfg=norm_cfg, norm_eval=False),\n    neck=dict(norm_cfg=norm_cfg),\n    roi_head=dict(bbox_head=dict(norm_cfg=norm_cfg)))\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=(640, 640),\n        ratio_range=(0.8, 1.2),\n        keep_ratio=True),\n    dict(type='RandomCrop', crop_size=(640, 640)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=(640, 640)),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(640, 640),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=64),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\noptimizer = dict(\n    type='SGD',\n    lr=0.08,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.1,\n    step=[30, 40])\n# runtime settings\nrunner = dict(max_epochs=50)\nevaluation = dict(interval=2)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_fpg_crop640_50e_coco.py'\n\nmodel = dict(\n    neck=dict(out_channels=128, inter_channels=128),\n    rpn_head=dict(in_channels=128),\n    roi_head=dict(\n        bbox_roi_extractor=dict(out_channels=128),\n        bbox_head=dict(in_channels=128),\n        mask_roi_extractor=dict(out_channels=128),\n        mask_head=dict(in_channels=128)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_fpn_crop640_50e_coco.py'\n\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    neck=dict(\n        type='FPG',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        inter_channels=256,\n        num_outs=5,\n        stack_times=9,\n        paths=['bu'] * 9,\n        same_down_trans=None,\n        same_up_trans=dict(\n            type='conv',\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_lateral_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_down_trans=dict(\n            type='interpolation_conv',\n            mode='nearest',\n            kernel_size=3,\n            norm_cfg=norm_cfg,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        across_up_trans=None,\n        across_skip_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        output_trans=dict(\n            type='last_conv',\n            kernel_size=3,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        norm_cfg=norm_cfg,\n        skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/mask_rcnn_r50_fpn_crop640_50e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    backbone=dict(norm_cfg=norm_cfg, norm_eval=False),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        norm_cfg=norm_cfg,\n        num_outs=5),\n    roi_head=dict(\n        bbox_head=dict(norm_cfg=norm_cfg), mask_head=dict(norm_cfg=norm_cfg)))\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=(640, 640),\n        ratio_range=(0.8, 1.2),\n        keep_ratio=True),\n    dict(type='RandomCrop', crop_size=(640, 640)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=(640, 640)),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(640, 640),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=64),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\noptimizer = dict(\n    type='SGD',\n    lr=0.08,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.1,\n    step=[30, 40])\n# runtime settings\nrunner = dict(max_epochs=50)\nevaluation = dict(interval=2)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/metafile.yml",
    "content": "Collections:\n  - Name: Feature Pyramid Grids\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Feature Pyramid Grids\n    Paper:\n      URL: https://arxiv.org/abs/2004.03580\n      Title: 'Feature Pyramid Grids'\n    README: configs/fpg/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.10.0/mmdet/models/necks/fpg.py#L101\n      Version: v2.10.0\n\nModels:\n  - Name: faster_rcnn_r50_fpg_crop640_50e_coco\n    In Collection: Feature Pyramid Grids\n    Config: configs/fpg/faster_rcnn_r50_fpg_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 20.0\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg_crop640_50e_coco/faster_rcnn_r50_fpg_crop640_50e_coco_20220311_011856-74109f42.pth\n\n  - Name: faster_rcnn_r50_fpg-chn128_crop640_50e_coco\n    In Collection: Feature Pyramid Grids\n    Config: configs/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 11.9\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/faster_rcnn_r50_fpg-chn128_crop640_50e_coco/faster_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011857-9376aa9d.pth\n\n  - Name: mask_rcnn_r50_fpg_crop640_50e_coco\n    In Collection: Feature Pyramid Grids\n    Config: configs/fpg/mask_rcnn_r50_fpg_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 23.2\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg_crop640_50e_coco/mask_rcnn_r50_fpg_crop640_50e_coco_20220311_011857-233b8334.pth\n\n  - Name: mask_rcnn_r50_fpg-chn128_crop640_50e_coco\n    In Collection: Feature Pyramid Grids\n    Config: configs/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 15.3\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/mask_rcnn_r50_fpg-chn128_crop640_50e_coco/mask_rcnn_r50_fpg-chn128_crop640_50e_coco_20220311_011859-043c9b4e.pth\n\n  - Name: retinanet_r50_fpg_crop640_50e_coco\n    In Collection: Feature Pyramid Grids\n    Config: configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 20.8\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg_crop640_50e_coco/retinanet_r50_fpg_crop640_50e_coco_20220311_110809-b0bcf5f4.pth\n\n  - Name: retinanet_r50_fpg-chn128_crop640_50e_coco\n    In Collection: Feature Pyramid Grids\n    Config: configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 19.9\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n    Weights:  https://download.openmmlab.com/mmdetection/v2.0/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco/retinanet_r50_fpg-chn128_crop640_50e_coco_20220313_104829-ee99a686.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/retinanet_r50_fpg-chn128_crop640_50e_coco.py",
    "content": "_base_ = 'retinanet_r50_fpg_crop640_50e_coco.py'\n\nmodel = dict(\n    neck=dict(out_channels=128, inter_channels=128),\n    bbox_head=dict(in_channels=128))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fpg/retinanet_r50_fpg_crop640_50e_coco.py",
    "content": "_base_ = '../nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py'\n\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    neck=dict(\n        _delete_=True,\n        type='FPG',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        inter_channels=256,\n        num_outs=5,\n        add_extra_convs=True,\n        start_level=1,\n        stack_times=9,\n        paths=['bu'] * 9,\n        same_down_trans=None,\n        same_up_trans=dict(\n            type='conv',\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_lateral_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_down_trans=dict(\n            type='interpolation_conv',\n            mode='nearest',\n            kernel_size=3,\n            norm_cfg=norm_cfg,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        across_up_trans=None,\n        across_skip_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        output_trans=dict(\n            type='last_conv',\n            kernel_size=3,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        norm_cfg=norm_cfg,\n        skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()]))\n\nevaluation = dict(interval=2)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/free_anchor/metafile.yml",
    "content": "Collections:\n  - Name: FreeAnchor\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FreeAnchor\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1909.02466\n      Title: 'FreeAnchor: Learning to Match Anchors for Visual Object Detection'\n    README: configs/free_anchor/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/dense_heads/free_anchor_retina_head.py#L10\n      Version: v2.0.0\n\nModels:\n  - Name: retinanet_free_anchor_r50_fpn_1x_coco\n    In Collection: FreeAnchor\n    Config: configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.9\n      inference time (ms/im):\n        - value: 54.35\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco/retinanet_free_anchor_r50_fpn_1x_coco_20200130-0f67375f.pth\n\n  - Name: retinanet_free_anchor_r101_fpn_1x_coco\n    In Collection: FreeAnchor\n    Config: configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.8\n      inference time (ms/im):\n        - value: 67.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco/retinanet_free_anchor_r101_fpn_1x_coco_20200130-358324e6.pth\n\n  - Name: retinanet_free_anchor_x101_32x4d_fpn_1x_coco\n    In Collection: FreeAnchor\n    Config: configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.1\n      inference time (ms/im):\n        - value: 90.09\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco/retinanet_free_anchor_x101_32x4d_fpn_1x_coco_20200130-d4846968.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/free_anchor/retinanet_free_anchor_r101_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(\n        _delete_=True,\n        type='FreeAnchorRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.75)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/free_anchor/retinanet_free_anchor_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_free_anchor_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fsaf/fsaf_r101_fpn_1x_coco.py",
    "content": "_base_ = './fsaf_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fsaf/fsaf_r50_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    type='FSAF',\n    bbox_head=dict(\n        type='FSAFHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        reg_decoded_bbox=True,\n        # Only anchor-free branch is implemented. The anchor generator only\n        #  generates 1 anchor at each feature point, as a substitute of the\n        #  grid of features.\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=1,\n            scales_per_octave=1,\n            ratios=[1.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(_delete_=True, type='TBLRBBoxCoder', normalizer=4.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0,\n            reduction='none'),\n        loss_bbox=dict(\n            _delete_=True,\n            type='IoULoss',\n            eps=1e-6,\n            loss_weight=1.0,\n            reduction='none')),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            _delete_=True,\n            type='CenterRegionAssigner',\n            pos_scale=0.2,\n            neg_scale=0.2,\n            min_pos_iof=0.01),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=10, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './fsaf_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/fsaf/metafile.yml",
    "content": "Collections:\n  - Name: FSAF\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x Titan-XP GPUs\n      Architecture:\n        - FPN\n        - FSAF\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1903.00621\n      Title: 'Feature Selective Anchor-Free Module for Single-Shot Object Detection'\n    README: configs/fsaf/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/detectors/fsaf.py#L6\n      Version: v2.1.0\n\nModels:\n  - Name: fsaf_r50_fpn_1x_coco\n    In Collection: FSAF\n    Config: configs/fsaf/fsaf_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.15\n      inference time (ms/im):\n        - value: 76.92\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r50_fpn_1x_coco/fsaf_r50_fpn_1x_coco-94ccc51f.pth\n\n  - Name: fsaf_r101_fpn_1x_coco\n    In Collection: FSAF\n    Config: configs/fsaf/fsaf_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.08\n      inference time (ms/im):\n        - value: 92.59\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.3 (37.9)\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_r101_fpn_1x_coco/fsaf_r101_fpn_1x_coco-9e71098f.pth\n\n  - Name: fsaf_x101_64x4d_fpn_1x_coco\n    In Collection: FSAF\n    Config: configs/fsaf/fsaf_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.38\n      inference time (ms/im):\n        - value: 178.57\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.4 (41.0)\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fsaf/fsaf_x101_64x4d_fpn_1x_coco/fsaf_x101_64x4d_fpn_1x_coco-e3f6e6fd.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py",
    "content": "_base_ = '../dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../dcn/cascade_mask_rcnn_x101_32x4d_fpn_dconv_c3-c5_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 4),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 4),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(plugins=[\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            stages=(False, True, True, True),\n            position='after_conv3')\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(plugins=[\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 4),\n            stages=(False, True, True, True),\n            position='after_conv3')\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 4),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(plugins=[\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            stages=(False, True, True, True),\n            position='after_conv3')\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(plugins=[\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 4),\n            stages=(False, True, True, True),\n            position='after_conv3')\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 4),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True), norm_eval=False))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        plugins=[\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 4),\n                stages=(False, True, True, True),\n                position='after_conv3')\n        ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gcnet/metafile.yml",
    "content": "Collections:\n  - Name: GCNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Global Context Block\n        - FPN\n        - RPN\n        - ResNet\n        - ResNeXt\n    Paper:\n      URL: https://arxiv.org/abs/1904.11492\n      Title: 'GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond'\n    README: configs/gcnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/ops/context_block.py#L13\n      Version: v2.0.0\n\nModels:\n  - Name: mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  35.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r16_gcb_c3-c5_1x_coco_20200515_211915-187da160.pth\n\n  - Name: mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.1\n      inference time (ms/im):\n        - value: 66.67\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_r4_gcb_c3-c5_1x_coco_20200204-17235656.pth\n\n  - Name: mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      inference time (ms/im):\n        - value: 87.72\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r16_gcb_c3-c5_1x_coco_20200205-e58ae947.pth\n\n  - Name: mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      inference time (ms/im):\n        - value: 86.21\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_r4_gcb_c3-c5_1x_coco_20200206-af22dc9d.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-backbone_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      inference time (ms/im):\n        - value: 60.24\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  34.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_1x_coco_20200202-bb3eb55c.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      inference time (ms/im):\n        - value: 64.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200202-587b99aa.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.1\n      inference time (ms/im):\n        - value: 66.23\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200202-50b90e5c.pth\n\n  - Name: mask_rcnn_r101_fpn_syncbn-backbone_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      inference time (ms/im):\n        - value: 75.19\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_1x_coco_20200210-81658c8a.pth\n\n  - Name: mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      inference time (ms/im):\n        - value: 83.33\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200207-945e77ca.pth\n\n  - Name: mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      inference time (ms/im):\n        - value: 84.75\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200206-8407a3f0.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      inference time (ms/im):\n        - value: 88.5\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200211-7584841c.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.8\n      inference time (ms/im):\n        - value: 102.04\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-cbed3d2c.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.0\n      inference time (ms/im):\n        - value: 103.09\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200212-68164964.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.2\n      inference time (ms/im):\n        - value: 119.05\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_1x_coco_20200310-d5ad2a5e.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      inference time (ms/im):\n        - value: 129.87\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r16_gcb_c3-c5_1x_coco_20200211-10bf2463.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.6\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:    40.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco_20200703_180653-ed035291.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  40.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_1x_coco_20210615_211019-abbc39ea.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 48.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r16_gcb_c3-c5_1x_coco_20210615_215648-44aa598a.pth\n\n  - Name: cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco\n    In Collection: GCNet\n    Config: configs/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:   41.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gcnet/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_syncbn-backbone_dconv_c3-c5_r4_gcb_c3-c5_1x_coco_20210615_161851-720338ec.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './gfl_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './gfl_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/gfl_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='GFL',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='GFLHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            beta=2.0,\n            loss_weight=1.0),\n        loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),\n        reg_max=16,\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './gfl_r50_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n# multi-scale training\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py",
    "content": "_base_ = './gfl_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    type='GFL',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, False, True, True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './gfl_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    type='GFL',\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gfl/metafile.yml",
    "content": "Collections:\n  - Name: Generalized Focal Loss\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Generalized Focal Loss\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/2006.04388\n      Title: 'Generalized Focal Loss: Learning Qualified and Distributed Bounding Boxes for Dense Object Detection'\n    README: configs/gfl/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/detectors/gfl.py#L6\n      Version: v2.2.0\n\nModels:\n  - Name: gfl_r50_fpn_1x_coco\n    In Collection: Generalized Focal Loss\n    Config: configs/gfl/gfl_r50_fpn_1x_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 51.28\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_1x_coco/gfl_r50_fpn_1x_coco_20200629_121244-25944287.pth\n\n  - Name: gfl_r50_fpn_mstrain_2x_coco\n    In Collection: Generalized Focal Loss\n    Config: configs/gfl/gfl_r50_fpn_mstrain_2x_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 51.28\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r50_fpn_mstrain_2x_coco/gfl_r50_fpn_mstrain_2x_coco_20200629_213802-37bb1edc.pth\n\n  - Name: gfl_r101_fpn_mstrain_2x_coco\n    In Collection: Generalized Focal Loss\n    Config: configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 68.03\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth\n\n  - Name: gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco\n    In Collection: Generalized Focal Loss\n    Config: configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth\n\n  - Name: gfl_x101_32x4d_fpn_mstrain_2x_coco\n    In Collection: Generalized Focal Loss\n    Config: configs/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 82.64\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_mstrain_2x_coco/gfl_x101_32x4d_fpn_mstrain_2x_coco_20200630_102002-50c1ffdb.pth\n\n  - Name: gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco\n    In Collection: Generalized Focal Loss\n    Config: configs/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 93.46\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 48.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco/gfl_x101_32x4d_fpn_dconv_c4-c5_mstrain_2x_coco_20200630_102002-14a2bf25.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ghm/metafile.yml",
    "content": "Collections:\n  - Name: GHM\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - GHM-C\n        - GHM-R\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1811.05181\n      Title: 'Gradient Harmonized Single-stage Detector'\n    README: configs/ghm/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/losses/ghm_loss.py#L21\n      Version: v2.0.0\n\nModels:\n  - Name: retinanet_ghm_r50_fpn_1x_coco\n    In Collection: GHM\n    Config: configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 303.03\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r50_fpn_1x_coco/retinanet_ghm_r50_fpn_1x_coco_20200130-a437fda3.pth\n\n  - Name: retinanet_ghm_r101_fpn_1x_coco\n    In Collection: GHM\n    Config: configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 227.27\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_r101_fpn_1x_coco/retinanet_ghm_r101_fpn_1x_coco_20200130-c148ee8f.pth\n\n  - Name: retinanet_ghm_x101_32x4d_fpn_1x_coco\n    In Collection: GHM\n    Config: configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      inference time (ms/im):\n        - value: 196.08\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco/retinanet_ghm_x101_32x4d_fpn_1x_coco_20200131-e4333bd0.pth\n\n  - Name: retinanet_ghm_x101_64x4d_fpn_1x_coco\n    In Collection: GHM\n    Config: configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      inference time (ms/im):\n        - value: 192.31\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco/retinanet_ghm_x101_64x4d_fpn_1x_coco_20200131-dd381cef.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ghm/retinanet_ghm_r101_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_ghm_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ghm/retinanet_ghm_r50_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(\n        loss_cls=dict(\n            _delete_=True,\n            type='GHMC',\n            bins=30,\n            momentum=0.75,\n            use_sigmoid=True,\n            loss_weight=1.0),\n        loss_bbox=dict(\n            _delete_=True,\n            type='GHMR',\n            mu=0.02,\n            bins=10,\n            momentum=0.7,\n            loss_weight=10.0)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ghm/retinanet_ghm_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_ghm_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ghm/retinanet_ghm_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_ghm_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet101_gn')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py",
    "content": "_base_ = './mask_rcnn_r101_fpn_gn-all_2x_coco.py'\n\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet50_gn')),\n    neck=dict(norm_cfg=norm_cfg),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=norm_cfg),\n        mask_head=dict(norm_cfg=norm_cfg)))\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn-all_2x_coco.py'\n\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://contrib/resnet50_gn')),\n    neck=dict(norm_cfg=norm_cfg),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=norm_cfg),\n        mask_head=dict(norm_cfg=norm_cfg)))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py'\n\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn/metafile.yml",
    "content": "Collections:\n  - Name: Group Normalization\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Group Normalization\n    Paper:\n      URL: https://arxiv.org/abs/1803.08494\n      Title: 'Group Normalization'\n    README: configs/gn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py\n      Version: v2.0.0\n\nModels:\n  - Name: mask_rcnn_r50_fpn_gn-all_2x_coco\n    In Collection: Group Normalization\n    Config: configs/gn/mask_rcnn_r50_fpn_gn-all_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      inference time (ms/im):\n        - value: 90.91\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_2x_coco/mask_rcnn_r50_fpn_gn-all_2x_coco_20200206-8eee02a6.pth\n\n  - Name: mask_rcnn_r50_fpn_gn-all_3x_coco\n    In Collection: Group Normalization\n    Config: configs/gn/mask_rcnn_r50_fpn_gn-all_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      inference time (ms/im):\n        - value: 90.91\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_3x_coco/mask_rcnn_r50_fpn_gn-all_3x_coco_20200214-8b23b1e5.pth\n\n  - Name: mask_rcnn_r101_fpn_gn-all_2x_coco\n    In Collection: Group Normalization\n    Config: configs/gn/mask_rcnn_r101_fpn_gn-all_2x_coco.py\n    Metadata:\n      Training Memory (GB): 9.9\n      inference time (ms/im):\n        - value: 111.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_2x_coco/mask_rcnn_r101_fpn_gn-all_2x_coco_20200205-d96b1b50.pth\n\n  - Name: mask_rcnn_r101_fpn_gn-all_3x_coco\n    In Collection: Group Normalization\n    Config: configs/gn/mask_rcnn_r101_fpn_gn-all_3x_coco.py\n    Metadata:\n      Training Memory (GB): 9.9\n      inference time (ms/im):\n        - value: 111.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r101_fpn_gn-all_3x_coco/mask_rcnn_r101_fpn_gn-all_3x_coco_20200513_181609-0df864f4.pth\n\n  - Name: mask_rcnn_r50_fpn_gn-all_contrib_2x_coco\n    In Collection: Group Normalization\n    Config: configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      inference time (ms/im):\n        - value: 91.74\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco/mask_rcnn_r50_fpn_gn-all_contrib_2x_coco_20200207-20d3e849.pth\n\n  - Name: mask_rcnn_r50_fpn_gn-all_contrib_3x_coco\n    In Collection: Group Normalization\n    Config: configs/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      inference time (ms/im):\n        - value: 91.74\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco/mask_rcnn_r50_fpn_gn-all_contrib_3x_coco_20200225-542aefbc.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://jhu/resnet101_gn_ws')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://jhu/resnet50_gn_ws')),\n    neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py'\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://jhu/resnext101_32x4d_gn_ws')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py",
    "content": "_base_ = './faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py'\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=50,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://jhu/resnext50_32x4d_gn_ws')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py",
    "content": "_base_ = './mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py'\n# learning policy\nlr_config = dict(step=[20, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://jhu/resnet101_gn_ws')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py'\n# learning policy\nlr_config = dict(step=[20, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://jhu/resnet50_gn_ws')),\n    neck=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg),\n        mask_head=dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg)))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py",
    "content": "_base_ = './mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py'\n# learning policy\nlr_config = dict(step=[20, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py'\n# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://jhu/resnext101_32x4d_gn_ws')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py",
    "content": "_base_ = './mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py'\n# learning policy\nlr_config = dict(step=[20, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py'\n# model settings\nconv_cfg = dict(type='ConvWS')\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=50,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        conv_cfg=conv_cfg,\n        norm_cfg=norm_cfg,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://jhu/resnext50_32x4d_gn_ws')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/gn+ws/metafile.yml",
    "content": "Collections:\n  - Name: Weight Standardization\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Group Normalization\n        - Weight Standardization\n    Paper:\n      URL: https://arxiv.org/abs/1903.10520\n      Title: 'Weight Standardization'\n    README: configs/gn+ws/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_gn_ws-all_1x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.9\n      inference time (ms/im):\n        - value: 85.47\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r50_fpn_gn_ws-all_1x_coco/faster_rcnn_r50_fpn_gn_ws-all_1x_coco_20200130-613d9fe2.pth\n\n  - Name: faster_rcnn_r101_fpn_gn_ws-all_1x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.9\n      inference time (ms/im):\n        - value: 111.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_r101_fpn_gn_ws-all_1x_coco/faster_rcnn_r101_fpn_gn_ws-all_1x_coco_20200205-a93b0d75.pth\n\n  - Name: faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 97.09\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x50_32x4d_fpn_gn_ws-all_1x_coco_20200203-839c5d9d.pth\n\n  - Name: faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.8\n      inference time (ms/im):\n        - value: 131.58\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco/faster_rcnn_x101_32x4d_fpn_gn_ws-all_1x_coco_20200212-27da1bc2.pth\n\n  - Name: mask_rcnn_r50_fpn_gn_ws-all_2x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.3\n      inference time (ms/im):\n        - value: 95.24\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_2x_coco/mask_rcnn_r50_fpn_gn_ws-all_2x_coco_20200226-16acb762.pth\n\n  - Name: mask_rcnn_r101_fpn_gn_ws-all_2x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      inference time (ms/im):\n        - value: 116.28\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_2x_coco/mask_rcnn_r101_fpn_gn_ws-all_2x_coco_20200212-ea357cd9.pth\n\n  - Name: mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco.py\n    Metadata:\n      Training Memory (GB): 8.4\n      inference time (ms/im):\n        - value: 107.53\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_2x_coco_20200216-649fdb6f.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco.py\n    Metadata:\n      Training Memory (GB): 12.2\n      inference time (ms/im):\n        - value: 140.85\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_2x_coco_20200319-33fb95b5.pth\n\n  - Name: mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco.py\n    Metadata:\n      Training Memory (GB): 7.3\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r50_fpn_gn_ws-all_20_23_24e_coco_20200213-487d1283.pth\n\n  - Name: mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_r101_fpn_gn_ws-all_20_23_24e_coco_20200213-57b5a50f.pth\n\n  - Name: mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco.py\n    Metadata:\n      Training Memory (GB): 8.4\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x50_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200226-969bcb2c.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco\n    In Collection: Weight Standardization\n    Config: configs/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco.py\n    Metadata:\n      Training Memory (GB): 12.2\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:  38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/gn%2Bws/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco/mask_rcnn_x101_32x4d_fpn_gn_ws-all_20_23_24e_coco_20200316-e6cd35ef.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py",
    "content": "_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py",
    "content": "_base_ = ['grid_rcnn_r50_fpn_gn-head_2x_coco.py']\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[8, 11])\ncheckpoint_config = dict(interval=1)\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    type='GridRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='GridRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            type='Shared2FCBBoxHead',\n            with_reg=False,\n            in_channels=256,\n            fc_out_channels=1024,\n            roi_feat_size=7,\n            num_classes=80,\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[0., 0., 0., 0.],\n                target_stds=[0.1, 0.1, 0.2, 0.2]),\n            reg_class_agnostic=False),\n        grid_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        grid_head=dict(\n            type='GridHead',\n            grid_points=9,\n            num_convs=8,\n            in_channels=256,\n            point_feat_channels=64,\n            norm_cfg=dict(type='GN', num_groups=36),\n            loss_grid=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=15))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.5,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True),\n            pos_radius=1,\n            pos_weight=-1,\n            max_num_grid=192,\n            debug=False)),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.03,\n            nms=dict(type='nms', iou_threshold=0.3),\n            max_per_img=100)))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=3665,\n    warmup_ratio=1.0 / 80,\n    step=[17, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=25)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py",
    "content": "_base_ = './grid_rcnn_r50_fpn_gn-head_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=3665,\n    warmup_ratio=1.0 / 80,\n    step=[17, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=25)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py",
    "content": "_base_ = './grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/grid_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Grid R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RPN\n        - Dilated Convolution\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/abs/1906.05688\n      Title: 'Grid R-CNN'\n    README: configs/grid_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/grid_rcnn.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: grid_rcnn_r50_fpn_gn-head_2x_coco\n    In Collection: Grid R-CNN\n    Config: configs/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 5.1\n      inference time (ms/im):\n        - value: 66.67\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco/grid_rcnn_r50_fpn_gn-head_2x_coco_20200130-6cca8223.pth\n\n  - Name: grid_rcnn_r101_fpn_gn-head_2x_coco\n    In Collection: Grid R-CNN\n    Config: configs/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 79.37\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_r101_fpn_gn-head_2x_coco/grid_rcnn_r101_fpn_gn-head_2x_coco_20200309-d6eca030.pth\n\n  - Name: grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco\n    In Collection: Grid R-CNN\n    Config: configs/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 8.3\n      inference time (ms/im):\n        - value: 92.59\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_32x4d_fpn_gn-head_2x_coco_20200130-d8f0e3ff.pth\n\n  - Name: grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco\n    In Collection: Grid R-CNN\n    Config: configs/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 11.3\n      inference time (ms/im):\n        - value: 129.87\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/grid_rcnn/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco/grid_rcnn_x101_64x4d_fpn_gn-head_2x_coco_20200204-ec76a754.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            aggregation='sum',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py",
    "content": "_base_ = '../grid_rcnn/grid_rcnn_r50_fpn_gn-head_1x_coco.py'\n# model settings\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            aggregation='sum',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2)),\n        grid_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py",
    "content": "_base_ = '../gcnet/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py'\n# model settings\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            aggregation='sum',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2)),\n        mask_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            aggregation='sum',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2)),\n        mask_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py",
    "content": "_base_ = '../gcnet/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_1x_coco.py'\n# model settings\nmodel = dict(\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            aggregation='sum',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2)),\n        mask_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32],\n            pre_cfg=dict(\n                type='ConvModule',\n                in_channels=256,\n                out_channels=256,\n                kernel_size=5,\n                padding=2,\n                inplace=False,\n            ),\n            post_cfg=dict(\n                type='GeneralizedAttention',\n                in_channels=256,\n                spatial_range=-1,\n                num_heads=6,\n                attention_type='0100',\n                kv_stride=2))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/groie/metafile.yml",
    "content": "Collections:\n  - Name: GRoIE\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Generic RoI Extractor\n        - FPN\n        - RPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/abs/2004.13665\n      Title: 'A novel Region of Interest Extraction Layer for Instance Segmentation'\n    README: configs/groie/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/roi_heads/roi_extractors/groie.py#L15\n      Version: v2.1.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_groie_1x_coco\n    In Collection: GRoIE\n    Config: configs/groie/faster_rcnn_r50_fpn_groie_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/faster_rcnn_r50_fpn_groie_1x_coco/faster_rcnn_r50_fpn_groie_1x_coco_20200604_211715-66ee9516.pth\n\n  - Name: grid_rcnn_r50_fpn_gn-head_groie_1x_coco\n    In Collection: GRoIE\n    Config: configs/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/grid_rcnn_r50_fpn_gn-head_groie_1x_coco/grid_rcnn_r50_fpn_gn-head_groie_1x_coco_20200605_202059-4b75d86f.pth\n\n  - Name: mask_rcnn_r50_fpn_groie_1x_coco\n    In Collection: GRoIE\n    Config: configs/groie/mask_rcnn_r50_fpn_groie_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_groie_1x_coco/mask_rcnn_r50_fpn_groie_1x_coco_20200604_211715-50d90c74.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco\n    In Collection: GRoIE\n    Config: configs/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP:   37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r50_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200604_211715-42eb79e1.pth\n\n  - Name: mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco\n    In Collection: GRoIE\n    Config: configs/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/groie/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco/mask_rcnn_r101_fpn_syncbn-backbone_r4_gcb_c3-c5_groie_1x_coco_20200607_224507-8daae01c.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_fast_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    roi_head=dict(\n        bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))),\n    # model training and testing settings\n    train_cfg=dict(\n        rcnn=dict(\n            assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6),\n            sampler=dict(num=256))),\n    test_cfg=dict(rcnn=dict(score_thr=1e-3)))\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=300),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadProposals', num_max_proposals=None),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img', 'proposals']),\n        ])\n]\ndata = dict(\n    train=dict(\n        proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_train2017.pkl',\n        pipeline=train_pipeline),\n    val=dict(\n        proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl',\n        pipeline=test_pipeline),\n    test=dict(\n        proposal_file=data_root + 'proposals/ga_rpn_r50_fpn_1x_val2017.pkl',\n        pipeline=test_pipeline))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './ga_faster_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    rpn_head=dict(\n        _delete_=True,\n        type='GARPNHead',\n        in_channels=256,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=8,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[8],\n            strides=[4, 8, 16, 32, 64]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.14, 0.14]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.11, 0.11]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),\n    roi_head=dict(\n        bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            ga_assigner=dict(\n                type='ApproxMaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            ga_sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            center_ratio=0.2,\n            ignore_ratio=0.5),\n        rpn_proposal=dict(nms_post=1000, max_per_img=300),\n        rcnn=dict(\n            assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6),\n            sampler=dict(type='RandomSampler', num=256))),\n    test_cfg=dict(\n        rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_faster_r50_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    rpn_head=dict(\n        _delete_=True,\n        type='GARPNHead',\n        in_channels=256,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=8,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[8],\n            strides=[4, 8, 16, 32, 64]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.14, 0.14]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.11, 0.11]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),\n    roi_head=dict(\n        bbox_head=dict(bbox_coder=dict(target_stds=[0.05, 0.05, 0.1, 0.1]))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            ga_assigner=dict(\n                type='ApproxMaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            ga_sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            center_ratio=0.2,\n            ignore_ratio=0.5),\n        rpn_proposal=dict(nms_post=1000, max_per_img=300),\n        rcnn=dict(\n            assigner=dict(pos_iou_thr=0.6, neg_iou_thr=0.6, min_pos_iou=0.6),\n            sampler=dict(type='RandomSampler', num=256))),\n    test_cfg=dict(\n        rpn=dict(nms_post=1000, max_per_img=300), rcnn=dict(score_thr=1e-3)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './ga_faster_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './ga_faster_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './ga_retinanet_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_mstrain_2x.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n\n# model settings\nmodel = dict(\n    type='RetinaNet',\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5),\n    bbox_head=dict(\n        type='GARetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)))\n# training and testing settings\ntrain_cfg = dict(\n    ga_assigner=dict(\n        type='ApproxMaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.4,\n        min_pos_iou=0.4,\n        ignore_iof_thr=-1),\n    ga_sampler=dict(\n        type='RandomSampler',\n        num=256,\n        pos_fraction=0.5,\n        neg_pos_ub=-1,\n        add_gt_as_proposals=False),\n    assigner=dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        min_pos_iou=0.0,\n        ignore_iof_thr=-1),\n    allowed_border=-1,\n    pos_weight=-1,\n    center_ratio=0.2,\n    ignore_ratio=0.5,\n    debug=False)\ntest_cfg = dict(\n    nms_pre=1000,\n    min_bbox_size=0,\n    score_thr=0.05,\n    nms=dict(type='nms', iou_threshold=0.5),\n    max_per_img=100)\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 960)],\n        keep_ratio=True,\n        multiscale_mode='range'),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='bbox')\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[16, 22])\ncheckpoint_config = dict(interval=1)\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        # dict(type='TensorboardLoggerHook')\n    ])\n# yapf:enable\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(\n        _delete_=True,\n        type='GARetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        ga_assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.4,\n            ignore_iof_thr=-1),\n        ga_sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0),\n        center_ratio=0.2,\n        ignore_ratio=0.5))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(\n        _delete_=True,\n        type='GARetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.04, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        ga_assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.4,\n            ignore_iof_thr=-1),\n        ga_sampler=dict(\n            type='RandomSampler',\n            num=256,\n            pos_fraction=0.5,\n            neg_pos_ub=-1,\n            add_gt_as_proposals=False),\n        assigner=dict(neg_iou_thr=0.5, min_pos_iou=0.0),\n        center_ratio=0.2,\n        ignore_ratio=0.5))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './ga_retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './ga_retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './ga_rpn_r50_caffe_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../rpn/rpn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    rpn_head=dict(\n        _delete_=True,\n        type='GARPNHead',\n        in_channels=256,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=8,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[8],\n            strides=[4, 8, 16, 32, 64]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.14, 0.14]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.11, 0.11]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            ga_assigner=dict(\n                type='ApproxMaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            ga_sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            center_ratio=0.2,\n            ignore_ratio=0.5)),\n    test_cfg=dict(rpn=dict(nms_post=1000)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_rpn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../rpn/rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    rpn_head=dict(\n        _delete_=True,\n        type='GARPNHead',\n        in_channels=256,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=8,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[8],\n            strides=[4, 8, 16, 32, 64]),\n        anchor_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.14, 0.14]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.07, 0.07, 0.11, 0.11]),\n        loc_filter_thr=0.01,\n        loss_loc=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            ga_assigner=dict(\n                type='ApproxMaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            ga_sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            center_ratio=0.2,\n            ignore_ratio=0.5)),\n    test_cfg=dict(rpn=dict(nms_post=1000)))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './ga_rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './ga_rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/guided_anchoring/metafile.yml",
    "content": "Collections:\n  - Name: Guided Anchoring\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - Guided Anchoring\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1901.03278\n      Title: 'Region Proposal by Guided Anchoring'\n    README: configs/guided_anchoring/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/dense_heads/ga_retina_head.py#L10\n      Version: v2.0.0\n\nModels:\n  - Name: ga_rpn_r50_caffe_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.3\n      inference time (ms/im):\n        - value: 63.29\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Region Proposal\n        Dataset: COCO\n        Metrics:\n          AR@1000: 68.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r50_caffe_fpn_1x_coco/ga_rpn_r50_caffe_fpn_1x_coco_20200531-899008a6.pth\n\n  - Name: ga_rpn_r101_caffe_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.3\n      inference time (ms/im):\n        - value: 76.92\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Region Proposal\n        Dataset: COCO\n        Metrics:\n          AR@1000: 69.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_r101_caffe_fpn_1x_coco/ga_rpn_r101_caffe_fpn_1x_coco_20200531-ca9ba8fb.pth\n\n  - Name: ga_rpn_x101_32x4d_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.5\n      inference time (ms/im):\n        - value: 100\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Region Proposal\n        Dataset: COCO\n        Metrics:\n          AR@1000: 70.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_32x4d_fpn_1x_coco/ga_rpn_x101_32x4d_fpn_1x_coco_20200220-c28d1b18.pth\n\n  - Name: ga_rpn_x101_64x4d_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      inference time (ms/im):\n        - value: 133.33\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Region Proposal\n        Dataset: COCO\n        Metrics:\n          AR@1000: 70.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_rpn_x101_64x4d_fpn_1x_coco/ga_rpn_x101_64x4d_fpn_1x_coco_20200225-3c6e1aa2.pth\n\n  - Name: ga_faster_r50_caffe_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r50_caffe_fpn_1x_coco/ga_faster_r50_caffe_fpn_1x_coco_20200702_000718-a11ccfe6.pth\n\n  - Name: ga_faster_r101_caffe_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.5\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_r101_caffe_fpn_1x_coco/ga_faster_r101_caffe_fpn_1x_coco_bbox_mAP-0.415_20200505_115528-fb82e499.pth\n\n  - Name: ga_faster_x101_32x4d_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.7\n      inference time (ms/im):\n        - value: 103.09\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_32x4d_fpn_1x_coco/ga_faster_x101_32x4d_fpn_1x_coco_20200215-1ded9da3.pth\n\n  - Name: ga_faster_x101_64x4d_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 11.8\n      inference time (ms/im):\n        - value: 136.99\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_faster_x101_64x4d_fpn_1x_coco/ga_faster_x101_64x4d_fpn_1x_coco_20200215-0fa7bde7.pth\n\n  - Name: ga_retinanet_r50_caffe_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.5\n      inference time (ms/im):\n        - value: 59.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r50_caffe_fpn_1x_coco/ga_retinanet_r50_caffe_fpn_1x_coco_20201020-39581c6f.pth\n\n  - Name: ga_retinanet_r101_caffe_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_r101_caffe_fpn_1x_coco/ga_retinanet_r101_caffe_fpn_1x_coco_20200531-6266453c.pth\n\n  - Name: ga_retinanet_x101_32x4d_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.9\n      inference time (ms/im):\n        - value: 94.34\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_32x4d_fpn_1x_coco/ga_retinanet_x101_32x4d_fpn_1x_coco_20200219-40c56caa.pth\n\n  - Name: ga_retinanet_x101_64x4d_fpn_1x_coco\n    In Collection: Guided Anchoring\n    Config: configs/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.9\n      inference time (ms/im):\n        - value: 129.87\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/guided_anchoring/ga_retinanet_x101_64x4d_fpn_1x_coco/ga_retinanet_x101_64x4d_fpn_1x_coco_20200226-ef9f7f1f.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py'\n# model settings\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='HRNet',\n        extra=dict(\n            stage1=dict(\n                num_modules=1,\n                num_branches=1,\n                block='BOTTLENECK',\n                num_blocks=(4, ),\n                num_channels=(64, )),\n            stage2=dict(\n                num_modules=1,\n                num_branches=2,\n                block='BASIC',\n                num_blocks=(4, 4),\n                num_channels=(32, 64)),\n            stage3=dict(\n                num_modules=4,\n                num_branches=3,\n                block='BASIC',\n                num_blocks=(4, 4, 4),\n                num_channels=(32, 64, 128)),\n            stage4=dict(\n                num_modules=3,\n                num_branches=4,\n                block='BASIC',\n                num_blocks=(4, 4, 4, 4),\n                num_channels=(32, 64, 128, 256))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),\n    neck=dict(\n        _delete_=True,\n        type='HRFPN',\n        in_channels=[32, 64, 128, 256],\n        out_channels=256))\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py'\n# model settings\nmodel = dict(\n    backbone=dict(\n        type='HRNet',\n        extra=dict(\n            stage2=dict(num_channels=(40, 80)),\n            stage3=dict(num_channels=(40, 80, 160)),\n            stage4=dict(num_channels=(40, 80, 160, 320))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')),\n    neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py",
    "content": "_base_ = './cascade_rcnn_hrnetv2p_w32_20e_coco.py'\n# model settings\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='HRNet',\n        extra=dict(\n            stage1=dict(\n                num_modules=1,\n                num_branches=1,\n                block='BOTTLENECK',\n                num_blocks=(4, ),\n                num_channels=(64, )),\n            stage2=dict(\n                num_modules=1,\n                num_branches=2,\n                block='BASIC',\n                num_blocks=(4, 4),\n                num_channels=(32, 64)),\n            stage3=dict(\n                num_modules=4,\n                num_branches=3,\n                block='BASIC',\n                num_blocks=(4, 4, 4),\n                num_channels=(32, 64, 128)),\n            stage4=dict(\n                num_modules=3,\n                num_branches=4,\n                block='BASIC',\n                num_blocks=(4, 4, 4, 4),\n                num_channels=(32, 64, 128, 256))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),\n    neck=dict(\n        _delete_=True,\n        type='HRFPN',\n        in_channels=[32, 64, 128, 256],\n        out_channels=256))\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py",
    "content": "_base_ = './cascade_rcnn_hrnetv2p_w32_20e_coco.py'\n# model settings\nmodel = dict(\n    backbone=dict(\n        type='HRNet',\n        extra=dict(\n            stage2=dict(num_channels=(40, 80)),\n            stage3=dict(num_channels=(40, 80, 160)),\n            stage4=dict(num_channels=(40, 80, 160, 320))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')),\n    neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py",
    "content": "_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py'\n# model settings\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py",
    "content": "_base_ = './faster_rcnn_hrnetv2p_w18_1x_coco.py'\n\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='HRNet',\n        extra=dict(\n            stage1=dict(\n                num_modules=1,\n                num_branches=1,\n                block='BOTTLENECK',\n                num_blocks=(4, ),\n                num_channels=(64, )),\n            stage2=dict(\n                num_modules=1,\n                num_branches=2,\n                block='BASIC',\n                num_blocks=(4, 4),\n                num_channels=(32, 64)),\n            stage3=dict(\n                num_modules=4,\n                num_branches=3,\n                block='BASIC',\n                num_blocks=(4, 4, 4),\n                num_channels=(32, 64, 128)),\n            stage4=dict(\n                num_modules=3,\n                num_branches=4,\n                block='BASIC',\n                num_blocks=(4, 4, 4, 4),\n                num_channels=(32, 64, 128, 256))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),\n    neck=dict(\n        _delete_=True,\n        type='HRFPN',\n        in_channels=[32, 64, 128, 256],\n        out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py",
    "content": "_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py",
    "content": "_base_ = './faster_rcnn_hrnetv2p_w32_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='HRNet',\n        extra=dict(\n            stage2=dict(num_channels=(40, 80)),\n            stage3=dict(num_channels=(40, 80, 160)),\n            stage4=dict(num_channels=(40, 80, 160, 320))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')),\n    neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py",
    "content": "_base_ = './faster_rcnn_hrnetv2p_w40_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py",
    "content": "_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py",
    "content": "_base_ = './fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py",
    "content": "_base_ = './fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py",
    "content": "_base_ = '../fcos/fcos_r50_caffe_fpn_gn-head_4x4_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='HRNet',\n        extra=dict(\n            stage1=dict(\n                num_modules=1,\n                num_branches=1,\n                block='BOTTLENECK',\n                num_blocks=(4, ),\n                num_channels=(64, )),\n            stage2=dict(\n                num_modules=1,\n                num_branches=2,\n                block='BASIC',\n                num_blocks=(4, 4),\n                num_channels=(32, 64)),\n            stage3=dict(\n                num_modules=4,\n                num_branches=3,\n                block='BASIC',\n                num_blocks=(4, 4, 4),\n                num_channels=(32, 64, 128)),\n            stage4=dict(\n                num_modules=3,\n                num_branches=4,\n                block='BASIC',\n                num_blocks=(4, 4, 4, 4),\n                num_channels=(32, 64, 128, 256))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),\n    neck=dict(\n        _delete_=True,\n        type='HRFPN',\n        in_channels=[32, 64, 128, 256],\n        out_channels=256,\n        stride=2,\n        num_outs=5))\nimg_norm_cfg = dict(\n    mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py",
    "content": "_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py",
    "content": "_base_ = './fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[103.53, 116.28, 123.675], std=[57.375, 57.12, 58.395], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py",
    "content": "_base_ = './fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='HRNet',\n        extra=dict(\n            stage2=dict(num_channels=(40, 80)),\n            stage3=dict(num_channels=(40, 80, 160)),\n            stage4=dict(num_channels=(40, 80, 160, 320))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')),\n    neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/htc_hrnetv2p_w18_20e_coco.py",
    "content": "_base_ = './htc_hrnetv2p_w32_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/htc_hrnetv2p_w32_20e_coco.py",
    "content": "_base_ = '../htc/htc_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='HRNet',\n        extra=dict(\n            stage1=dict(\n                num_modules=1,\n                num_branches=1,\n                block='BOTTLENECK',\n                num_blocks=(4, ),\n                num_channels=(64, )),\n            stage2=dict(\n                num_modules=1,\n                num_branches=2,\n                block='BASIC',\n                num_blocks=(4, 4),\n                num_channels=(32, 64)),\n            stage3=dict(\n                num_modules=4,\n                num_branches=3,\n                block='BASIC',\n                num_blocks=(4, 4, 4),\n                num_channels=(32, 64, 128)),\n            stage4=dict(\n                num_modules=3,\n                num_branches=4,\n                block='BASIC',\n                num_blocks=(4, 4, 4, 4),\n                num_channels=(32, 64, 128, 256))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),\n    neck=dict(\n        _delete_=True,\n        type='HRFPN',\n        in_channels=[32, 64, 128, 256],\n        out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/htc_hrnetv2p_w40_20e_coco.py",
    "content": "_base_ = './htc_hrnetv2p_w32_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='HRNet',\n        extra=dict(\n            stage2=dict(num_channels=(40, 80)),\n            stage3=dict(num_channels=(40, 80, 160)),\n            stage4=dict(num_channels=(40, 80, 160, 320))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')),\n    neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/htc_hrnetv2p_w40_28e_coco.py",
    "content": "_base_ = './htc_hrnetv2p_w40_20e_coco.py'\n# learning policy\nlr_config = dict(step=[24, 27])\nrunner = dict(type='EpochBasedRunner', max_epochs=28)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/htc_x101_64x4d_fpn_16x1_28e_coco.py",
    "content": "_base_ = '../htc/htc_x101_64x4d_fpn_16x1_20e_coco.py'\n# learning policy\nlr_config = dict(step=[24, 27])\nrunner = dict(type='EpochBasedRunner', max_epochs=28)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py",
    "content": "_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        extra=dict(\n            stage2=dict(num_channels=(18, 36)),\n            stage3=dict(num_channels=(18, 36, 72)),\n            stage4=dict(num_channels=(18, 36, 72, 144))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w18')),\n    neck=dict(type='HRFPN', in_channels=[18, 36, 72, 144], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py",
    "content": "_base_ = './mask_rcnn_hrnetv2p_w18_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='HRNet',\n        extra=dict(\n            stage1=dict(\n                num_modules=1,\n                num_branches=1,\n                block='BOTTLENECK',\n                num_blocks=(4, ),\n                num_channels=(64, )),\n            stage2=dict(\n                num_modules=1,\n                num_branches=2,\n                block='BASIC',\n                num_blocks=(4, 4),\n                num_channels=(32, 64)),\n            stage3=dict(\n                num_modules=4,\n                num_branches=3,\n                block='BASIC',\n                num_blocks=(4, 4, 4),\n                num_channels=(32, 64, 128)),\n            stage4=dict(\n                num_modules=3,\n                num_branches=4,\n                block='BASIC',\n                num_blocks=(4, 4, 4, 4),\n                num_channels=(32, 64, 128, 256))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w32')),\n    neck=dict(\n        _delete_=True,\n        type='HRFPN',\n        in_channels=[32, 64, 128, 256],\n        out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py",
    "content": "_base_ = './mask_rcnn_hrnetv2p_w32_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py",
    "content": "_base_ = './mask_rcnn_hrnetv2p_w18_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='HRNet',\n        extra=dict(\n            stage2=dict(num_channels=(40, 80)),\n            stage3=dict(num_channels=(40, 80, 160)),\n            stage4=dict(num_channels=(40, 80, 160, 320))),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://msra/hrnetv2_w40')),\n    neck=dict(type='HRFPN', in_channels=[40, 80, 160, 320], out_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py",
    "content": "_base_ = './mask_rcnn_hrnetv2p_w40_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/hrnet/metafile.yml",
    "content": "Models:\n  - Name: faster_rcnn_hrnetv2p_w18_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.6\n      inference time (ms/im):\n        - value: 74.63\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_1x_coco/faster_rcnn_hrnetv2p_w18_1x_coco_20200130-56651a6d.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: faster_rcnn_hrnetv2p_w18_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco.py\n    Metadata:\n      Training Memory (GB): 6.6\n      inference time (ms/im):\n        - value: 74.63\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w18_2x_coco/faster_rcnn_hrnetv2p_w18_2x_coco_20200702_085731-a4ec0611.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: faster_rcnn_hrnetv2p_w32_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.0\n      inference time (ms/im):\n        - value: 80.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_1x_coco/faster_rcnn_hrnetv2p_w32_1x_coco_20200130-6e286425.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: faster_rcnn_hrnetv2p_w32_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco.py\n    Metadata:\n      Training Memory (GB): 9.0\n      inference time (ms/im):\n        - value: 80.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w32_2x_coco/faster_rcnn_hrnetv2p_w32_2x_coco_20200529_015927-976a9c15.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: faster_rcnn_hrnetv2p_w40_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.4\n      inference time (ms/im):\n        - value: 95.24\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_1x_coco/faster_rcnn_hrnetv2p_w40_1x_coco_20200210-95c1f5ce.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: faster_rcnn_hrnetv2p_w40_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.4\n      inference time (ms/im):\n        - value: 95.24\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/faster_rcnn_hrnetv2p_w40_2x_coco/faster_rcnn_hrnetv2p_w40_2x_coco_20200512_161033-0f236ef4.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: mask_rcnn_hrnetv2p_w18_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 85.47\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 34.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_1x_coco/mask_rcnn_hrnetv2p_w18_1x_coco_20200205-1c3d78ed.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: mask_rcnn_hrnetv2p_w18_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 85.47\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w18_2x_coco/mask_rcnn_hrnetv2p_w18_2x_coco_20200212-b3c825b1.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: mask_rcnn_hrnetv2p_w32_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.4\n      inference time (ms/im):\n        - value: 88.5\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_1x_coco/mask_rcnn_hrnetv2p_w32_1x_coco_20200207-b29f616e.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: mask_rcnn_hrnetv2p_w32_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco.py\n    Metadata:\n      Training Memory (GB): 9.4\n      inference time (ms/im):\n        - value: 88.5\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w32_2x_coco/mask_rcnn_hrnetv2p_w32_2x_coco_20200213-45b75b4d.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: mask_rcnn_hrnetv2p_w40_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.9\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_1x_coco/mask_rcnn_hrnetv2p_w40_1x_coco_20200511_015646-66738b35.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: mask_rcnn_hrnetv2p_w40_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.9\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/mask_rcnn_hrnetv2p_w40_2x_coco/mask_rcnn_hrnetv2p_w40_2x_coco_20200512_163732-aed5e4ab.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: cascade_rcnn_hrnetv2p_w18_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 90.91\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w18_20e_coco/cascade_rcnn_hrnetv2p_w18_20e_coco_20200210-434be9d7.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: cascade_rcnn_hrnetv2p_w32_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco.py\n    Metadata:\n      Training Memory (GB): 9.4\n      inference time (ms/im):\n        - value: 90.91\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w32_20e_coco/cascade_rcnn_hrnetv2p_w32_20e_coco_20200208-928455a4.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: cascade_rcnn_hrnetv2p_w40_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco.py\n    Metadata:\n      Training Memory (GB): 10.8\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_rcnn_hrnetv2p_w40_20e_coco/cascade_rcnn_hrnetv2p_w40_20e_coco_20200512_161112-75e47b04.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: cascade_mask_rcnn_hrnetv2p_w18_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco.py\n    Metadata:\n      Training Memory (GB): 8.5\n      inference time (ms/im):\n        - value: 117.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w18_20e_coco/cascade_mask_rcnn_hrnetv2p_w18_20e_coco_20200210-b543cd2b.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: cascade_mask_rcnn_hrnetv2p_w32_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco.py\n    Metadata:\n      inference time (ms/im):\n        - value: 120.48\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w32_20e_coco/cascade_mask_rcnn_hrnetv2p_w32_20e_coco_20200512_154043-39d9cf7b.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: cascade_mask_rcnn_hrnetv2p_w40_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco.py\n    Metadata:\n      Training Memory (GB): 12.5\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/cascade_mask_rcnn_hrnetv2p_w40_20e_coco/cascade_mask_rcnn_hrnetv2p_w40_20e_coco_20200527_204922-969c4610.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: htc_hrnetv2p_w18_20e_coco\n    In Collection: HTC\n    Config: configs/hrnet/htc_hrnetv2p_w18_20e_coco.py\n    Metadata:\n      Training Memory (GB): 10.8\n      inference time (ms/im):\n        - value: 212.77\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w18_20e_coco/htc_hrnetv2p_w18_20e_coco_20200210-b266988c.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: htc_hrnetv2p_w32_20e_coco\n    In Collection: HTC\n    Config: configs/hrnet/htc_hrnetv2p_w32_20e_coco.py\n    Metadata:\n      Training Memory (GB): 13.1\n      inference time (ms/im):\n        - value: 204.08\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w32_20e_coco/htc_hrnetv2p_w32_20e_coco_20200207-7639fa12.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: htc_hrnetv2p_w40_20e_coco\n    In Collection: HTC\n    Config: configs/hrnet/htc_hrnetv2p_w40_20e_coco.py\n    Metadata:\n      Training Memory (GB): 14.6\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/htc_hrnetv2p_w40_20e_coco/htc_hrnetv2p_w40_20e_coco_20200529_183411-417c4d5b.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w18_gn-head_4x4_1x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 13.0\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 35.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco/fcos_hrnetv2p_w18_gn-head_4x4_1x_coco_20201212_100710-4ad151de.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w18_gn-head_4x4_2x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 13.0\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_4x4_2x_coco_20201212_101110-5c575fa5.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w32_gn-head_4x4_1x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 17.5\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco/fcos_hrnetv2p_w32_gn-head_4x4_1x_coco_20201211_134730-cb8055c0.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w32_gn-head_4x4_2x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 17.5\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_4x4_2x_coco_20201212_112133-77b6b9bb.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 13.0\n      inference time (ms/im):\n        - value: 77.52\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w18_gn-head_mstrain_640-800_4x4_2x_coco_20201212_111651-441e9d9f.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 17.5\n      inference time (ms/im):\n        - value: 80.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w32_gn-head_mstrain_640-800_4x4_2x_coco_20201212_090846-b6f2b49f.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n\n  - Name: fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco\n    In Collection: FCOS\n    Config: configs/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco.py\n    Metadata:\n      Training Resources: 4x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 20.3\n      inference time (ms/im):\n        - value: 92.59\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Architecture:\n        - HRNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/hrnet/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco/fcos_hrnetv2p_w40_gn-head_mstrain_640-800_4x4_2x_coco_20201212_124752-f22d2ce5.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.04514\n      Title: 'Deep High-Resolution Representation Learning for Visual Recognition'\n    README: configs/hrnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/backbones/hrnet.py#L195\n      Version: v2.0.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_r101_fpn_20e_coco.py",
    "content": "_base_ = './htc_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_r50_fpn_1x_coco.py",
    "content": "_base_ = './htc_without_semantic_r50_fpn_1x_coco.py'\nmodel = dict(\n    roi_head=dict(\n        semantic_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[8]),\n        semantic_head=dict(\n            type='FusedSemanticHead',\n            num_ins=5,\n            fusion_level=1,\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=183,\n            loss_seg=dict(\n                type='CrossEntropyLoss', ignore_index=255, loss_weight=0.2))))\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='SegRescale', scale_factor=1 / 8),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip', flip_ratio=0.5),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(\n        seg_prefix=data_root + 'stuffthingmaps/train2017/',\n        pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_r50_fpn_20e_coco.py",
    "content": "_base_ = './htc_r50_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_without_semantic_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    type='HybridTaskCascade',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        type='RPNHead',\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='HybridTaskCascadeRoIHead',\n        interleaved=True,\n        mask_info_flow=True,\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=[\n            dict(\n                type='HTCMaskHead',\n                with_conv_res=False,\n                num_convs=4,\n                in_channels=256,\n                conv_out_channels=256,\n                num_classes=80,\n                loss_mask=dict(\n                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),\n            dict(\n                type='HTCMaskHead',\n                num_convs=4,\n                in_channels=256,\n                conv_out_channels=256,\n                num_classes=80,\n                loss_mask=dict(\n                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),\n            dict(\n                type='HTCMaskHead',\n                num_convs=4,\n                in_channels=256,\n                conv_out_channels=256,\n                num_classes=80,\n                loss_mask=dict(\n                    type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))\n        ]),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False),\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.5,\n                    neg_iou_thr=0.5,\n                    min_pos_iou=0.5,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.6,\n                    neg_iou_thr=0.6,\n                    min_pos_iou=0.6,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False),\n            dict(\n                assigner=dict(\n                    type='MaxIoUAssigner',\n                    pos_iou_thr=0.7,\n                    neg_iou_thr=0.7,\n                    min_pos_iou=0.7,\n                    ignore_iof_thr=-1),\n                sampler=dict(\n                    type='RandomSampler',\n                    num=512,\n                    pos_fraction=0.25,\n                    neg_pos_ub=-1,\n                    add_gt_as_proposals=True),\n                mask_size=28,\n                pos_weight=-1,\n                debug=False)\n        ]),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=1000,\n            max_per_img=1000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            score_thr=0.001,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100,\n            mask_thr_binary=0.5)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip', flip_ratio=0.5),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    val=dict(pipeline=test_pipeline), test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py",
    "content": "_base_ = './htc_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py",
    "content": "_base_ = './htc_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py",
    "content": "_base_ = './htc_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations', with_bbox=True, with_mask=True, with_seg=True),\n    dict(\n        type='Resize',\n        img_scale=[(1600, 400), (1600, 1400)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='SegRescale', scale_factor=1 / 8),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ndata = dict(\n    samples_per_gpu=1, workers_per_gpu=1, train=dict(pipeline=train_pipeline))\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/htc/metafile.yml",
    "content": "Collections:\n  - Name: HTC\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - HTC\n        - RPN\n        - ResNet\n        - ResNeXt\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/abs/1901.07518\n      Title: 'Hybrid Task Cascade for Instance Segmentation'\n    README: configs/htc/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/htc.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: htc_r50_fpn_1x_coco\n    In Collection: HTC\n    Config: configs/htc/htc_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.2\n      inference time (ms/im):\n        - value: 172.41\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth\n\n  - Name: htc_r50_fpn_20e_coco\n    In Collection: HTC\n    Config: configs/htc/htc_r50_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 8.2\n      inference time (ms/im):\n        - value: 172.41\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth\n\n  - Name: htc_r101_fpn_20e_coco\n    In Collection: HTC\n    Config: configs/htc/htc_r101_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 10.2\n      inference time (ms/im):\n        - value: 181.82\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth\n\n  - Name: htc_x101_32x4d_fpn_16x1_20e_coco\n    In Collection: HTC\n    Config: configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py\n    Metadata:\n      Training Resources: 16x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 11.4\n      inference time (ms/im):\n        - value: 200\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth\n\n  - Name: htc_x101_64x4d_fpn_16x1_20e_coco\n    In Collection: HTC\n    Config: configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py\n    Metadata:\n      Training Resources: 16x V100 GPUs\n      Batch Size: 16\n      Training Memory (GB): 14.5\n      inference time (ms/im):\n        - value: 227.27\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth\n\n  - Name: htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco\n    In Collection: HTC\n    Config: configs/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco.py\n    Metadata:\n      Training Resources: 16x V100 GPUs\n      Batch Size: 16\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 50.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 43.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco/htc_x101_64x4d_fpn_dconv_c3-c5_mstrain_400_1400_16x1_20e_coco_20200312-946fd751.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/cascade_mask_rcnn_r101_fpn_instaboost_4x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='InstaBoost',\n        action_candidate=('normal', 'horizontal', 'skip'),\n        action_prob=(1, 0, 0),\n        scale=(0.8, 1.2),\n        dx=15,\n        dy=15,\n        theta=(-1, 1),\n        color_prob=0.5,\n        hflag=False,\n        aug_ratio=0.5),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n# learning policy\nlr_config = dict(step=[32, 44])\nrunner = dict(type='EpochBasedRunner', max_epochs=48)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/cascade_mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_instaboost_4x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='InstaBoost',\n        action_candidate=('normal', 'horizontal', 'skip'),\n        action_prob=(1, 0, 0),\n        scale=(0.8, 1.2),\n        dx=15,\n        dy=15,\n        theta=(-1, 1),\n        color_prob=0.5,\n        hflag=False,\n        aug_ratio=0.5),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n# learning policy\nlr_config = dict(step=[32, 44])\nrunner = dict(type='EpochBasedRunner', max_epochs=48)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_instaboost_4x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/instaboost/metafile.yml",
    "content": "Collections:\n  - Name: InstaBoost\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - InstaBoost\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n    Paper:\n      URL: https://arxiv.org/abs/1908.07801\n      Title: 'Instaboost: Boosting instance segmentation via probability map guided copy-pasting'\n    README: configs/instaboost/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/datasets/pipelines/instaboost.py#L7\n      Version: v2.0.0\n\nModels:\n  - Name: mask_rcnn_r50_fpn_instaboost_4x_coco\n    In Collection: InstaBoost\n    Config: configs/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      inference time (ms/im):\n        - value: 57.14\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 48\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r50_fpn_instaboost_4x_coco/mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-d025f83a.pth\n\n  - Name: mask_rcnn_r101_fpn_instaboost_4x_coco\n    In Collection: InstaBoost\n    Config: configs/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      Epochs: 48\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_r101_fpn_instaboost_4x_coco/mask_rcnn_r101_fpn_instaboost_4x_coco_20200703_235738-f23f3a5f.pth\n\n  - Name: mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco\n    In Collection: InstaBoost\n    Config: configs/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco.py\n    Metadata:\n      Training Memory (GB): 10.7\n      Epochs: 48\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco/mask_rcnn_x101_64x4d_fpn_instaboost_4x_coco_20200515_080947-8ed58c1b.pth\n\n  - Name: cascade_mask_rcnn_r50_fpn_instaboost_4x_coco\n    In Collection: InstaBoost\n    Config: configs/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      inference time (ms/im):\n        - value: 83.33\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 48\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/instaboost/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco/cascade_mask_rcnn_r50_fpn_instaboost_4x_coco_20200307-c19d98d9.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lad/lad_r101_paa_r50_fpn_coco_1x.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nteacher_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth'  # noqa\nmodel = dict(\n    type='LAD',\n    # student\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='LADHead',\n        reg_decoded_bbox=True,\n        score_voting=True,\n        topk=9,\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),\n    # teacher\n    teacher_ckpt=teacher_ckpt,\n    teacher_backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    teacher_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    teacher_bbox_head=dict(\n        type='LADHead',\n        reg_decoded_bbox=True,\n        score_voting=True,\n        topk=9,\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.1,\n            neg_iou_thr=0.1,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        score_voting=True,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\ndata = dict(samples_per_gpu=8, workers_per_gpu=4)\noptimizer = dict(lr=0.01)\nfp16 = dict(loss_scale=512.)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lad/lad_r50_paa_r101_fpn_coco_1x.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nteacher_ckpt = 'http://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth'  # noqa\nmodel = dict(\n    type='LAD',\n    # student\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='LADHead',\n        reg_decoded_bbox=True,\n        score_voting=True,\n        topk=9,\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),\n    # teacher\n    teacher_ckpt=teacher_ckpt,\n    teacher_backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch'),\n    teacher_neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    teacher_bbox_head=dict(\n        type='LADHead',\n        reg_decoded_bbox=True,\n        score_voting=True,\n        topk=9,\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.1,\n            neg_iou_thr=0.1,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        score_voting=True,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\ndata = dict(samples_per_gpu=8, workers_per_gpu=4)\noptimizer = dict(lr=0.01)\nfp16 = dict(loss_scale=512.)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lad/metafile.yml",
    "content": "Collections:\n  - Name: Label Assignment Distillation\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - Label Assignment Distillation\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 2x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/2108.10520\n      Title: 'Improving Object Detection by Label Assignment Distillation'\n    README: configs/lad/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.19.0/mmdet/models/detectors/lad.py#L10\n      Version: v2.19.0\n\nModels:\n  - Name: lad_r101_paa_r50_fpn_coco_1x\n    In Collection: Label Assignment Distillation\n    Config: configs/lad/lad_r101_paa_r50_fpn_coco_1x.py\n    Metadata:\n      Training Memory (GB): 12.4\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 43.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/lad/lad_r101_paa_r50_fpn_coco_1x/lad_r101_paa_r50_fpn_coco_1x_20220708_124357-9407ac54.pth\n  - Name: lad_r50_paa_r101_fpn_coco_1x\n    In Collection: Label Assignment Distillation\n    Config: configs/lad/lad_r50_paa_r101_fpn_coco_1x.py\n    Metadata:\n      Training Memory (GB): 8.9\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/lad/lad_r50_paa_r101_fpn_coco_1x/lad_r50_paa_r101_fpn_coco_1x_20220708_124246-74c76ff0.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py",
    "content": "_base_ = ['./ld_r18_gflv1_r101_fpn_coco_1x.py']\nteacher_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20200630_102002-134b07df.pth'  # noqa\nmodel = dict(\n    teacher_config='configs/gfl/gfl_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py',\n    teacher_ckpt=teacher_ckpt,\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5))\n\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n# multi-scale training\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nteacher_ckpt = 'https://download.openmmlab.com/mmdetection/v2.0/gfl/gfl_r101_fpn_mstrain_2x_coco/gfl_r101_fpn_mstrain_2x_coco_20200629_200126-dd12f847.pth'  # noqa\nmodel = dict(\n    type='KnowledgeDistillationSingleStageDetector',\n    teacher_config='configs/gfl/gfl_r101_fpn_mstrain_2x_coco.py',\n    teacher_ckpt=teacher_ckpt,\n    backbone=dict(\n        type='ResNet',\n        depth=18,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),\n    neck=dict(\n        type='FPN',\n        in_channels=[64, 128, 256, 512],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='LDHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            beta=2.0,\n            loss_weight=1.0),\n        loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),\n        loss_ld=dict(\n            type='KnowledgeDistillationKLDivLoss', loss_weight=0.25, T=10),\n        reg_max=16,\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py",
    "content": "_base_ = ['./ld_r18_gflv1_r101_fpn_coco_1x.py']\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=34,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet34')),\n    neck=dict(\n        type='FPN',\n        in_channels=[64, 128, 256, 512],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py",
    "content": "_base_ = ['./ld_r18_gflv1_r101_fpn_coco_1x.py']\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ld/metafile.yml",
    "content": "Collections:\n  - Name: Localization Distillation\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - Localization Distillation\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/2102.12252\n      Title: 'Localization Distillation for Dense Object Detection'\n    README: configs/ld/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.11.0/mmdet/models/dense_heads/ld_head.py#L11\n      Version: v2.11.0\n\nModels:\n  - Name: ld_r18_gflv1_r101_fpn_coco_1x\n    In Collection: Localization Distillation\n    Config: configs/ld/ld_r18_gflv1_r101_fpn_coco_1x.py\n    Metadata:\n      Training Memory (GB): 1.8\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 36.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ld/ld_r18_gflv1_r101_fpn_coco_1x/ld_r18_gflv1_r101_fpn_coco_1x_20220702_062206-330e6332.pth\n  - Name: ld_r34_gflv1_r101_fpn_coco_1x\n    In Collection: Localization Distillation\n    Config: configs/ld/ld_r34_gflv1_r101_fpn_coco_1x.py\n    Metadata:\n      Training Memory (GB): 2.2\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ld/ld_r34_gflv1_r101_fpn_coco_1x/ld_r34_gflv1_r101_fpn_coco_1x_20220630_134007-9bc69413.pth\n  - Name: ld_r50_gflv1_r101_fpn_coco_1x\n    In Collection: Localization Distillation\n    Config: configs/ld/ld_r50_gflv1_r101_fpn_coco_1x.py\n    Metadata:\n      Training Memory (GB): 3.6\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 41.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ld/ld_r50_gflv1_r101_fpn_coco_1x/ld_r50_gflv1_r101_fpn_coco_1x_20220629_145355-8dc5bad8.pth\n  - Name: ld_r101_gflv1_r101dcn_fpn_coco_2x\n    In Collection: Localization Distillation\n    Config: configs/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x.py\n    Metadata:\n      Training Memory (GB): 5.5\n      Epochs: 24\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 45.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ld/ld_r101_gflv1_r101dcn_fpn_coco_2x/ld_r101_gflv1_r101dcn_fpn_coco_2x_20220629_185920-9e658426.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/legacy_1.x/cascade_mask_rcnn_r50_fpn_1x_coco_v1.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='CascadeRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5),\n    rpn_head=dict(\n        anchor_generator=dict(type='LegacyAnchorGenerator', center_offset=0.5),\n        bbox_coder=dict(\n            type='LegacyDeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0])),\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=7,\n                sampling_ratio=2,\n                aligned=False)),\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                reg_class_agnostic=True,\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='LegacyDeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2])),\n            dict(\n                type='Shared2FCBBoxHead',\n                reg_class_agnostic=True,\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='LegacyDeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1])),\n            dict(\n                type='Shared2FCBBoxHead',\n                reg_class_agnostic=True,\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='LegacyDeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067])),\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=14,\n                sampling_ratio=2,\n                aligned=False))))\ndist_params = dict(backend='nccl', port=29515)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/legacy_1.x/faster_rcnn_r50_fpn_1x_coco_v1.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='FasterRCNN',\n    backbone=dict(\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    rpn_head=dict(\n        type='RPNHead',\n        anchor_generator=dict(\n            type='LegacyAnchorGenerator',\n            center_offset=0.5,\n            scales=[8],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[4, 8, 16, 32, 64]),\n        bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        type='StandardRoIHead',\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=7,\n                sampling_ratio=2,\n                aligned=False),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=dict(\n            bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn_proposal=dict(max_per_img=2000),\n        rcnn=dict(assigner=dict(match_low_quality=True))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/legacy_1.x/mask_rcnn_r50_fpn_1x_coco_v1.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    rpn_head=dict(\n        anchor_generator=dict(type='LegacyAnchorGenerator', center_offset=0.5),\n        bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=7,\n                sampling_ratio=2,\n                aligned=False)),\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=14,\n                sampling_ratio=2,\n                aligned=False)),\n        bbox_head=dict(\n            bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n\n    # model training and testing settings\n    train_cfg=dict(\n        rpn_proposal=dict(max_per_img=2000),\n        rcnn=dict(assigner=dict(match_low_quality=True))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/legacy_1.x/retinanet_r50_caffe_fpn_1x_coco_v1.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco_v1.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[102.9801, 115.9465, 122.7717], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/legacy_1.x/retinanet_r50_fpn_1x_coco_v1.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    bbox_head=dict(\n        type='RetinaHead',\n        anchor_generator=dict(\n            type='LegacyAnchorGenerator',\n            center_offset=0.5,\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(type='LegacyDeltaXYWHBBoxCoder'),\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/legacy_1.x/ssd300_coco_v1.py",
    "content": "_base_ = [\n    '../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n# model settings\ninput_size = 300\nmodel = dict(\n    bbox_head=dict(\n        type='SSDHead',\n        anchor_generator=dict(\n            type='LegacySSDAnchorGenerator',\n            scale_major=False,\n            input_size=input_size,\n            basesize_ratio_range=(0.15, 0.9),\n            strides=[8, 16, 32, 64, 100, 300],\n            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),\n        bbox_coder=dict(\n            type='LegacyDeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2])))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(300, 300), keep_ratio=False),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(300, 300),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=3,\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict(_delete_=True)\ndist_params = dict(backend='nccl', port=29555)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/libra_rcnn/libra_fast_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../fast_rcnn/fast_rcnn_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    neck=[\n        dict(\n            type='FPN',\n            in_channels=[256, 512, 1024, 2048],\n            out_channels=256,\n            num_outs=5),\n        dict(\n            type='BFP',\n            in_channels=256,\n            num_levels=5,\n            refine_level=2,\n            refine_type='non_local')\n    ],\n    roi_head=dict(\n        bbox_head=dict(\n            loss_bbox=dict(\n                _delete_=True,\n                type='BalancedL1Loss',\n                alpha=0.5,\n                gamma=1.5,\n                beta=1.0,\n                loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rcnn=dict(\n            sampler=dict(\n                _delete_=True,\n                type='CombinedSampler',\n                num=512,\n                pos_fraction=0.25,\n                add_gt_as_proposals=True,\n                pos_sampler=dict(type='InstanceBalancedPosSampler'),\n                neg_sampler=dict(\n                    type='IoUBalancedNegSampler',\n                    floor_thr=-1,\n                    floor_fraction=0,\n                    num_bins=3)))))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\ndata = dict(\n    train=dict(proposal_file=data_root +\n               'libra_proposals/rpn_r50_fpn_1x_train2017.pkl'),\n    val=dict(proposal_file=data_root +\n             'libra_proposals/rpn_r50_fpn_1x_val2017.pkl'),\n    test=dict(proposal_file=data_root +\n              'libra_proposals/rpn_r50_fpn_1x_val2017.pkl'))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    neck=[\n        dict(\n            type='FPN',\n            in_channels=[256, 512, 1024, 2048],\n            out_channels=256,\n            num_outs=5),\n        dict(\n            type='BFP',\n            in_channels=256,\n            num_levels=5,\n            refine_level=2,\n            refine_type='non_local')\n    ],\n    roi_head=dict(\n        bbox_head=dict(\n            loss_bbox=dict(\n                _delete_=True,\n                type='BalancedL1Loss',\n                alpha=0.5,\n                gamma=1.5,\n                beta=1.0,\n                loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rpn=dict(sampler=dict(neg_pos_ub=5), allowed_border=-1),\n        rcnn=dict(\n            sampler=dict(\n                _delete_=True,\n                type='CombinedSampler',\n                num=512,\n                pos_fraction=0.25,\n                add_gt_as_proposals=True,\n                pos_sampler=dict(type='InstanceBalancedPosSampler'),\n                neg_sampler=dict(\n                    type='IoUBalancedNegSampler',\n                    floor_thr=-1,\n                    floor_fraction=0,\n                    num_bins=3)))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './libra_faster_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    neck=[\n        dict(\n            type='FPN',\n            in_channels=[256, 512, 1024, 2048],\n            out_channels=256,\n            start_level=1,\n            add_extra_convs='on_input',\n            num_outs=5),\n        dict(\n            type='BFP',\n            in_channels=256,\n            num_levels=5,\n            refine_level=1,\n            refine_type='non_local')\n    ],\n    bbox_head=dict(\n        loss_bbox=dict(\n            _delete_=True,\n            type='BalancedL1Loss',\n            alpha=0.5,\n            gamma=1.5,\n            beta=0.11,\n            loss_weight=1.0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/libra_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Libra R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - IoU-Balanced Sampling\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Balanced Feature Pyramid\n    Paper:\n      URL: https://arxiv.org/abs/1904.02701\n      Title: 'Libra R-CNN: Towards Balanced Learning for Object Detection'\n    README: configs/libra_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/necks/bfp.py#L10\n      Version: v2.0.0\n\nModels:\n  - Name: libra_faster_rcnn_r50_fpn_1x_coco\n    In Collection: Libra R-CNN\n    Config: configs/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.6\n      inference time (ms/im):\n        - value: 52.63\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r50_fpn_1x_coco/libra_faster_rcnn_r50_fpn_1x_coco_20200130-3afee3a9.pth\n\n  - Name: libra_faster_rcnn_r101_fpn_1x_coco\n    In Collection: Libra R-CNN\n    Config: configs/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.5\n      inference time (ms/im):\n        - value: 69.44\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_r101_fpn_1x_coco/libra_faster_rcnn_r101_fpn_1x_coco_20200203-8dba6a5a.pth\n\n  - Name: libra_faster_rcnn_x101_64x4d_fpn_1x_coco\n    In Collection: Libra R-CNN\n    Config: configs/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.8\n      inference time (ms/im):\n        - value: 117.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_faster_rcnn_x101_64x4d_fpn_1x_coco/libra_faster_rcnn_x101_64x4d_fpn_1x_coco_20200315-3a7d0488.pth\n\n  - Name: libra_retinanet_r50_fpn_1x_coco\n    In Collection: Libra R-CNN\n    Config: configs/libra_rcnn/libra_retinanet_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.2\n      inference time (ms/im):\n        - value: 56.5\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/libra_rcnn/libra_retinanet_r50_fpn_1x_coco/libra_retinanet_r50_fpn_1x_coco_20200205-804d94ce.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_1x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_r101_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/lvis_v1_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(num_classes=1203), mask_head=dict(num_classes=1203)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.0001,\n            # LVIS allows up to 300\n            max_per_img=300)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(dataset=dict(pipeline=train_pipeline)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/lvis_v0.5_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(num_classes=1230), mask_head=dict(num_classes=1230)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.0001,\n            # LVIS allows up to 300\n            max_per_img=300)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(dataset=dict(pipeline=train_pipeline)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_x101_32x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_1x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_1x_lvis_v1.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/lvis/mask_rcnn_x101_64x4d_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_mstrain_2x_lvis_v0.5.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py",
    "content": "_base_ = './mask2former_r50_lsj_8x2_50e_coco-panoptic.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py",
    "content": "_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py']\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_panoptic.py', '../_base_/default_runtime.py'\n]\nnum_things_classes = 80\nnum_stuff_classes = 53\nnum_classes = num_things_classes + num_stuff_classes\nmodel = dict(\n    type='Mask2Former',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    panoptic_head=dict(\n        type='Mask2FormerHead',\n        in_channels=[256, 512, 1024, 2048],  # pass to pixel_decoder inside\n        strides=[4, 8, 16, 32],\n        feat_channels=256,\n        out_channels=256,\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes,\n        num_queries=100,\n        num_transformer_feat_level=3,\n        pixel_decoder=dict(\n            type='MSDeformAttnPixelDecoder',\n            num_outs=3,\n            norm_cfg=dict(type='GN', num_groups=32),\n            act_cfg=dict(type='ReLU'),\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=dict(\n                        type='MultiScaleDeformableAttention',\n                        embed_dims=256,\n                        num_heads=8,\n                        num_levels=3,\n                        num_points=4,\n                        im2col_step=64,\n                        dropout=0.0,\n                        batch_first=False,\n                        norm_cfg=None,\n                        init_cfg=None),\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=256,\n                        feedforward_channels=1024,\n                        num_fcs=2,\n                        ffn_drop=0.0,\n                        act_cfg=dict(type='ReLU', inplace=True)),\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm')),\n                init_cfg=None),\n            positional_encoding=dict(\n                type='SinePositionalEncoding', num_feats=128, normalize=True),\n            init_cfg=None),\n        enforce_decoder_input_project=False,\n        positional_encoding=dict(\n            type='SinePositionalEncoding', num_feats=128, normalize=True),\n        transformer_decoder=dict(\n            type='DetrTransformerDecoder',\n            return_intermediate=True,\n            num_layers=9,\n            transformerlayers=dict(\n                type='DetrTransformerDecoderLayer',\n                attn_cfgs=dict(\n                    type='MultiheadAttention',\n                    embed_dims=256,\n                    num_heads=8,\n                    attn_drop=0.0,\n                    proj_drop=0.0,\n                    dropout_layer=None,\n                    batch_first=False),\n                ffn_cfgs=dict(\n                    embed_dims=256,\n                    feedforward_channels=2048,\n                    num_fcs=2,\n                    act_cfg=dict(type='ReLU', inplace=True),\n                    ffn_drop=0.0,\n                    dropout_layer=None,\n                    add_identity=True),\n                feedforward_channels=2048,\n                operation_order=('cross_attn', 'norm', 'self_attn', 'norm',\n                                 'ffn', 'norm')),\n            init_cfg=None),\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            loss_weight=2.0,\n            reduction='mean',\n            class_weight=[1.0] * num_classes + [0.1]),\n        loss_mask=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            reduction='mean',\n            loss_weight=5.0),\n        loss_dice=dict(\n            type='DiceLoss',\n            use_sigmoid=True,\n            activate=True,\n            reduction='mean',\n            naive_dice=True,\n            eps=1.0,\n            loss_weight=5.0)),\n    panoptic_fusion_head=dict(\n        type='MaskFormerFusionHead',\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes,\n        loss_panoptic=None,\n        init_cfg=None),\n    train_cfg=dict(\n        num_points=12544,\n        oversample_ratio=3.0,\n        importance_sample_ratio=0.75,\n        assigner=dict(\n            type='MaskHungarianAssigner',\n            cls_cost=dict(type='ClassificationCost', weight=2.0),\n            mask_cost=dict(\n                type='CrossEntropyLossCost', weight=5.0, use_sigmoid=True),\n            dice_cost=dict(\n                type='DiceCost', weight=5.0, pred_act=True, eps=1.0)),\n        sampler=dict(type='MaskPseudoSampler')),\n    test_cfg=dict(\n        panoptic_on=True,\n        # For now, the dataset does not support\n        # evaluating semantic segmentation metric.\n        semantic_on=False,\n        instance_on=True,\n        # max_per_image is for instance segmentation.\n        max_per_image=100,\n        iou_thr=0.8,\n        # In Mask2Former's panoptic postprocessing,\n        # it will filter mask area where score is less than 0.5 .\n        filter_low_score=True),\n    init_cfg=None)\n\n# dataset settings\nimage_size = (1024, 1024)\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(\n        type='LoadPanopticAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        with_seg=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    # large scale jittering\n    dict(\n        type='Resize',\n        img_scale=image_size,\n        ratio_range=(0.1, 2.0),\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='RandomCrop',\n        crop_size=image_size,\n        crop_type='absolute',\n        recompute_bbox=True,\n        allow_negative_crop=True),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=image_size),\n    dict(type='DefaultFormatBundle', img_to_float=True),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata_root = 'data/coco/'\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(\n        pipeline=test_pipeline,\n        ins_ann_file=data_root + 'annotations/instances_val2017.json',\n    ),\n    test=dict(\n        pipeline=test_pipeline,\n        ins_ann_file=data_root + 'annotations/instances_val2017.json',\n    ))\n\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.05,\n    eps=1e-8,\n    betas=(0.9, 0.999),\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n            'query_embed': embed_multi,\n            'query_feat': embed_multi,\n            'level_embed': embed_multi,\n        },\n        norm_decay_mult=0.0))\noptimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2))\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    gamma=0.1,\n    by_epoch=False,\n    step=[327778, 355092],\n    warmup='linear',\n    warmup_by_epoch=False,\n    warmup_ratio=1.0,  # no warmup\n    warmup_iters=10)\n\nmax_iters = 368750\nrunner = dict(type='IterBasedRunner', max_iters=max_iters)\n\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook', by_epoch=False),\n        dict(type='TensorboardLoggerHook', by_epoch=False)\n    ])\ninterval = 5000\nworkflow = [('train', interval)]\ncheckpoint_config = dict(\n    by_epoch=False, interval=interval, save_last=True, max_keep_ckpts=3)\n\n# Before 365001th iteration, we do evaluation every 5000 iterations.\n# After 365000th iteration, we do evaluation every 368750 iterations,\n# which means that we do evaluation at the end of training.\ndynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]\nevaluation = dict(\n    interval=interval,\n    dynamic_intervals=dynamic_intervals,\n    metric=['PQ', 'bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py",
    "content": "_base_ = ['./mask2former_r50_lsj_8x2_50e_coco-panoptic.py']\nnum_things_classes = 80\nnum_stuff_classes = 0\nnum_classes = num_things_classes + num_stuff_classes\nmodel = dict(\n    panoptic_head=dict(\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes,\n        loss_cls=dict(class_weight=[1.0] * num_classes + [0.1])),\n    panoptic_fusion_head=dict(\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes),\n    test_cfg=dict(panoptic_on=False))\n\n# dataset settings\nimage_size = (1024, 1024)\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\npad_cfg = dict(img=(128, 128, 128), masks=0, seg=255)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    # large scale jittering\n    dict(\n        type='Resize',\n        img_scale=image_size,\n        ratio_range=(0.1, 2.0),\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='RandomCrop',\n        crop_size=image_size,\n        crop_type='absolute',\n        recompute_bbox=True,\n        allow_negative_crop=True),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-5, 1e-5), by_mask=True),\n    dict(type='Pad', size=image_size, pad_val=pad_cfg),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle', img_to_float=True),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Pad', size_divisor=32, pad_val=pad_cfg),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\ndata = dict(\n    _delete_=True,\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\nevaluation = dict(metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py",
    "content": "_base_ = ['./mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384_22k.pth'  # noqa\n\nmodel = dict(\n    backbone=dict(init_cfg=dict(type='Pretrained', checkpoint=pretrained)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py",
    "content": "_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_base_patch4_window12_384.pth'  # noqa\n\ndepths = [2, 2, 18, 2]\nmodel = dict(\n    backbone=dict(\n        pretrain_img_size=384,\n        embed_dims=128,\n        depths=depths,\n        num_heads=[4, 8, 16, 32],\n        window_size=12,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    panoptic_head=dict(in_channels=[128, 256, 512, 1024]))\n\n# set all layers in backbone to lr_mult=0.1\n# set all norm layers, position_embeding,\n# query_embeding, level_embeding to decay_multi=0.0\nbackbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0)\nbackbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0)\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\ncustom_keys = {\n    'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n    'backbone.patch_embed.norm': backbone_norm_multi,\n    'backbone.norm': backbone_norm_multi,\n    'absolute_pos_embed': backbone_embed_multi,\n    'relative_position_bias_table': backbone_embed_multi,\n    'query_embed': embed_multi,\n    'query_feat': embed_multi,\n    'level_embed': embed_multi\n}\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi\n    for stage_id, num_blocks in enumerate(depths)\n    for block_id in range(num_blocks)\n})\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi\n    for stage_id in range(len(depths) - 1)\n})\n# optimizer\noptimizer = dict(\n    paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py",
    "content": "_base_ = ['./mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth'  # noqa\n\nmodel = dict(\n    backbone=dict(\n        embed_dims=192,\n        num_heads=[6, 12, 24, 48],\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    panoptic_head=dict(num_queries=200, in_channels=[192, 384, 768, 1536]))\n\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\n\nlr_config = dict(step=[655556, 710184])\n\nmax_iters = 737500\nrunner = dict(type='IterBasedRunner', max_iters=max_iters)\n\n# Before 735001th iteration, we do evaluation every 5000 iterations.\n# After 735000th iteration, we do evaluation every 737500 iterations,\n# which means that we do evaluation at the end of training.'\ninterval = 5000\ndynamic_intervals = [(max_iters // interval * interval + 1, max_iters)]\nevaluation = dict(\n    interval=interval,\n    dynamic_intervals=dynamic_intervals,\n    metric=['PQ', 'bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py",
    "content": "_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa\n\ndepths = [2, 2, 18, 2]\nmodel = dict(\n    backbone=dict(\n        depths=depths, init_cfg=dict(type='Pretrained',\n                                     checkpoint=pretrained)))\n\n# set all layers in backbone to lr_mult=0.1\n# set all norm layers, position_embeding,\n# query_embeding, level_embeding to decay_multi=0.0\nbackbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0)\nbackbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0)\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\ncustom_keys = {\n    'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n    'backbone.patch_embed.norm': backbone_norm_multi,\n    'backbone.norm': backbone_norm_multi,\n    'absolute_pos_embed': backbone_embed_multi,\n    'relative_position_bias_table': backbone_embed_multi,\n    'query_embed': embed_multi,\n    'query_feat': embed_multi,\n    'level_embed': embed_multi\n}\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi\n    for stage_id, num_blocks in enumerate(depths)\n    for block_id in range(num_blocks)\n})\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi\n    for stage_id in range(len(depths) - 1)\n})\n# optimizer\noptimizer = dict(\n    paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py",
    "content": "_base_ = ['./mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa\n\ndepths = [2, 2, 18, 2]\nmodel = dict(\n    backbone=dict(\n        depths=depths, init_cfg=dict(type='Pretrained',\n                                     checkpoint=pretrained)))\n\n# set all layers in backbone to lr_mult=0.1\n# set all norm layers, position_embeding,\n# query_embeding, level_embeding to decay_multi=0.0\nbackbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0)\nbackbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0)\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\ncustom_keys = {\n    'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n    'backbone.patch_embed.norm': backbone_norm_multi,\n    'backbone.norm': backbone_norm_multi,\n    'absolute_pos_embed': backbone_embed_multi,\n    'relative_position_bias_table': backbone_embed_multi,\n    'query_embed': embed_multi,\n    'query_feat': embed_multi,\n    'level_embed': embed_multi\n}\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi\n    for stage_id, num_blocks in enumerate(depths)\n    for block_id in range(num_blocks)\n})\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi\n    for stage_id in range(len(depths) - 1)\n})\n# optimizer\noptimizer = dict(\n    paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py",
    "content": "_base_ = ['./mask2former_r50_lsj_8x2_50e_coco-panoptic.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa\n\ndepths = [2, 2, 6, 2]\nmodel = dict(\n    type='Mask2Former',\n    backbone=dict(\n        _delete_=True,\n        type='SwinTransformer',\n        embed_dims=96,\n        depths=depths,\n        num_heads=[3, 6, 12, 24],\n        window_size=7,\n        mlp_ratio=4,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.3,\n        patch_norm=True,\n        out_indices=(0, 1, 2, 3),\n        with_cp=False,\n        convert_weights=True,\n        frozen_stages=-1,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    panoptic_head=dict(\n        type='Mask2FormerHead', in_channels=[96, 192, 384, 768]),\n    init_cfg=None)\n\n# set all layers in backbone to lr_mult=0.1\n# set all norm layers, position_embeding,\n# query_embeding, level_embeding to decay_multi=0.0\nbackbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0)\nbackbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0)\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\ncustom_keys = {\n    'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n    'backbone.patch_embed.norm': backbone_norm_multi,\n    'backbone.norm': backbone_norm_multi,\n    'absolute_pos_embed': backbone_embed_multi,\n    'relative_position_bias_table': backbone_embed_multi,\n    'query_embed': embed_multi,\n    'query_feat': embed_multi,\n    'level_embed': embed_multi\n}\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi\n    for stage_id, num_blocks in enumerate(depths)\n    for block_id in range(num_blocks)\n})\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi\n    for stage_id in range(len(depths) - 1)\n})\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.05,\n    eps=1e-8,\n    betas=(0.9, 0.999),\n    paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py",
    "content": "_base_ = ['./mask2former_r50_lsj_8x2_50e_coco.py']\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa\ndepths = [2, 2, 6, 2]\nmodel = dict(\n    type='Mask2Former',\n    backbone=dict(\n        _delete_=True,\n        type='SwinTransformer',\n        embed_dims=96,\n        depths=depths,\n        num_heads=[3, 6, 12, 24],\n        window_size=7,\n        mlp_ratio=4,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.3,\n        patch_norm=True,\n        out_indices=(0, 1, 2, 3),\n        with_cp=False,\n        convert_weights=True,\n        frozen_stages=-1,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    panoptic_head=dict(\n        type='Mask2FormerHead', in_channels=[96, 192, 384, 768]),\n    init_cfg=None)\n\n# set all layers in backbone to lr_mult=0.1\n# set all norm layers, position_embeding,\n# query_embeding, level_embeding to decay_multi=0.0\nbackbone_norm_multi = dict(lr_mult=0.1, decay_mult=0.0)\nbackbone_embed_multi = dict(lr_mult=0.1, decay_mult=0.0)\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\ncustom_keys = {\n    'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n    'backbone.patch_embed.norm': backbone_norm_multi,\n    'backbone.norm': backbone_norm_multi,\n    'absolute_pos_embed': backbone_embed_multi,\n    'relative_position_bias_table': backbone_embed_multi,\n    'query_embed': embed_multi,\n    'query_feat': embed_multi,\n    'level_embed': embed_multi\n}\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.blocks.{block_id}.norm': backbone_norm_multi\n    for stage_id, num_blocks in enumerate(depths)\n    for block_id in range(num_blocks)\n})\ncustom_keys.update({\n    f'backbone.stages.{stage_id}.downsample.norm': backbone_norm_multi\n    for stage_id in range(len(depths) - 1)\n})\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.05,\n    eps=1e-8,\n    betas=(0.9, 0.999),\n    paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask2former/metafile.yml",
    "content": "Collections:\n  - Name: Mask2Former\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Weight Decay\n      Training Resources: 8x A100 GPUs\n      Architecture:\n        - Mask2Former\n    Paper:\n      URL: https://arxiv.org/pdf/2112.01527\n      Title: 'Masked-attention Mask Transformer for Universal Image Segmentation'\n    README: configs/mask2former/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.23.0/mmdet/models/detectors/mask2former.py#L7\n      Version: v2.23.0\n\nModels:\n- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 19.1\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 47.8\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 44.5\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 54.5\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220329_225200-c7b94355.pth\n- Name: mask2former_r101_lsj_8x2_50e_coco\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py\n  Metadata:\n    Training Memory (GB): 15.5\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 46.7\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 44.0\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth\n- Name: mask2former_r101_lsj_8x2_50e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 16.1\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 45.3\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 42.4\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 52.4\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco-panoptic/mask2former_r101_lsj_8x2_50e_coco-panoptic_20220329_225104-c54e64c9.pth\n- Name: mask2former_r50_lsj_8x2_50e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 13.9\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 44.8\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 41.9\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 51.9\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic/mask2former_r50_lsj_8x2_50e_coco-panoptic_20220326_224516-11a44721.pth\n- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 15.9\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 46.3\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 43.4\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 53.4\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco-panoptic_20220326_224553-fc567107.pth\n- Name: mask2former_r50_lsj_8x2_50e_coco\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py\n  Metadata:\n    Training Memory (GB): 13.7\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 45.7\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 42.9\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth\n- Name: mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 21.1\n    Iterations: 737500\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 52.2\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 48.5\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 57.6\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic/mask2former_swin-l-p4-w12-384-in21k_lsj_16x1_100e_coco-panoptic_20220407_104949-d4919c44.pth\n- Name: mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 25.8\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 50.0\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 46.3\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 56.3\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384-in21k_lsj_8x2_50e_coco-panoptic_20220329_230021-3bb8b482.pth\n- Name: mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic.py\n  Metadata:\n    Training Memory (GB): 26.0\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 48.2\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 44.9\n  - Task: Panoptic Segmentation\n    Dataset: COCO\n    Metrics:\n      PQ: 55.1\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic/mask2former_swin-b-p4-w12-384_lsj_8x2_50e_coco-panoptic_20220331_002244-c149a9e9.pth\n- Name: mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py\n  Metadata:\n    Training Memory (GB): 15.3\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 47.7\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 44.7\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth\n- Name: mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco\n  In Collection: Mask2Former\n  Config: configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py\n  Metadata:\n    Training Memory (GB): 18.8\n    Iterations: 368750\n  Results:\n  - Task: Object Detection\n    Dataset: COCO\n    Metrics:\n      box AP: 49.3\n  - Task: Instance Segmentation\n    Dataset: COCO\n    Metrics:\n      mask AP: 46.1\n  Weights: https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_c4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_caffe_c4.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py'\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_poly_1x_coco_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    rpn_head=dict(\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0)),\n    roi_head=dict(\n        bbox_roi_extractor=dict(\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=7,\n                sampling_ratio=2,\n                aligned=False)),\n        bbox_head=dict(\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)),\n        mask_roi_extractor=dict(\n            roi_layer=dict(\n                type='RoIAlign',\n                output_size=14,\n                sampling_ratio=2,\n                aligned=False))))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_wandb_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# Set evaluation interval\nevaluation = dict(interval=2)\n# Set checkpoint interval\ncheckpoint_config = dict(interval=4)\n\n# yapf:disable\nlog_config = dict(\n    interval=50,\n    hooks=[\n        dict(type='TextLoggerHook'),\n        dict(type='MMDetWandbHook',\n             init_kwargs={\n                'project': 'mmdetection',\n                'group': 'maskrcnn-r50-fpn-1x-coco'\n             },\n             interval=50,\n             log_checkpoint=True,\n             log_checkpoint_metadata=True,\n             num_eval_images=100)\n        ])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_1x_coco.py'\n# fp16 settings\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_poly_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py",
    "content": "_base_ = './mask_rcnn_r101_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=8,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnext101_32x8d')))\n\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675],\n    std=[57.375, 57.120, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py",
    "content": "_base_ = './mask_rcnn_r101_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=8,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnext101_32x8d')))\n\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675],\n    std=[57.375, 57.120, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=8,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnext101_32x8d')))\n\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675],\n    std=[57.375, 57.120, 58.395],\n    to_rgb=False)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_x101_32x4d_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py",
    "content": "_base_ = './mask_rcnn_x101_32x4d_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/mask_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Mask R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Softmax\n        - RPN\n        - Convolution\n        - Dense Connections\n        - FPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/abs/1703.06870v3\n      Title: \"Mask R-CNN\"\n    README: configs/mask_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/mask_rcnn.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: mask_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.3\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 34.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco/mask_rcnn_r50_caffe_fpn_1x_coco_bbox_mAP-0.38__segm_mAP-0.344_20200504_231812-0ebd1859.pth\n\n  - Name: mask_rcnn_r50_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      inference time (ms/im):\n        - value: 62.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 34.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth\n\n  - Name: mask_rcnn_r50_fpn_fp16_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.6\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n        - Mixed Precision Training\n      inference time (ms/im):\n        - value: 41.49\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP16\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 34.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth\n\n  - Name: mask_rcnn_r50_fpn_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      inference time (ms/im):\n        - value: 62.11\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 35.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth\n\n  - Name: mask_rcnn_r101_caffe_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_1x_coco/mask_rcnn_r101_caffe_fpn_1x_coco_20200601_095758-805e06c1.pth\n\n  - Name: mask_rcnn_r101_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      inference time (ms/im):\n        - value: 74.07\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth\n\n  - Name: mask_rcnn_r101_fpn_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      inference time (ms/im):\n        - value: 74.07\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      inference time (ms/im):\n        - value: 88.5\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      inference time (ms/im):\n        - value: 88.5\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth\n\n  - Name: mask_rcnn_x101_64x4d_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.7\n      inference time (ms/im):\n        - value: 125\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth\n\n  - Name: mask_rcnn_x101_64x4d_fpn_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.7\n      inference time (ms/im):\n        - value: 125\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth\n\n  - Name: mask_rcnn_x101_32x8d_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.6\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 42.8\n    - Task: Instance Segmentation\n      Dataset: COCO\n      Metrics:\n        mask AP: 38.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco/mask_rcnn_x101_32x8d_fpn_1x_coco_20220630_173841-0aaf329e.pth\n\n  - Name: mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco.py\n    Metadata:\n      Training Memory (GB): 4.3\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_2x_coco_bbox_mAP-0.403__segm_mAP-0.365_20200504_231822-a75c98ce.pth\n\n  - Name: mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.3\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth\n\n  - Name: mask_rcnn_r50_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.1\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_fpn_mstrain-poly_3x_coco_20210524_201154-21b550bb.pth\n\n  - Name: mask_rcnn_r101_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 6.1\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_fpn_mstrain-poly_3x_coco_20210524_200244-5675c317.pth\n\n  - Name: mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.9\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r101_caffe_fpn_mstrain-poly_3x_coco_20210526_132339-3c33ce02.pth\n\n  - Name: mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.3\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x4d_fpn_mstrain-poly_3x_coco_20210524_201410-abcd7859.pth\n\n  - Name: mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.4\n      Epochs: 12\n    Results:\n    - Task: Object Detection\n      Dataset: COCO\n      Metrics:\n        box AP: 43.4\n    - Task: Instance Segmentation\n      Dataset: COCO\n      Metrics:\n        mask AP: 39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_1x_coco_20220630_170346-b4637974.pth\n\n  - Name: mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 10.3\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_32x8d_fpn_mstrain-poly_3x_coco_20210607_161042-8bd2c639.pth\n\n  - Name: mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Epochs: 36\n      Training Memory (GB): 10.4\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco/mask_rcnn_x101_64x4d_fpn_mstrain-poly_3x_coco_20210526_120447-c376f129.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_panoptic.py', '../_base_/default_runtime.py'\n]\nnum_things_classes = 80\nnum_stuff_classes = 53\nnum_classes = num_things_classes + num_stuff_classes\nmodel = dict(\n    type='MaskFormer',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=-1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    panoptic_head=dict(\n        type='MaskFormerHead',\n        in_channels=[256, 512, 1024, 2048],  # pass to pixel_decoder inside\n        feat_channels=256,\n        out_channels=256,\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes,\n        num_queries=100,\n        pixel_decoder=dict(\n            type='TransformerEncoderPixelDecoder',\n            norm_cfg=dict(type='GN', num_groups=32),\n            act_cfg=dict(type='ReLU'),\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=dict(\n                        type='MultiheadAttention',\n                        embed_dims=256,\n                        num_heads=8,\n                        attn_drop=0.1,\n                        proj_drop=0.1,\n                        dropout_layer=None,\n                        batch_first=False),\n                    ffn_cfgs=dict(\n                        embed_dims=256,\n                        feedforward_channels=2048,\n                        num_fcs=2,\n                        act_cfg=dict(type='ReLU', inplace=True),\n                        ffn_drop=0.1,\n                        dropout_layer=None,\n                        add_identity=True),\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm'),\n                    norm_cfg=dict(type='LN'),\n                    init_cfg=None,\n                    batch_first=False),\n                init_cfg=None),\n            positional_encoding=dict(\n                type='SinePositionalEncoding', num_feats=128, normalize=True)),\n        enforce_decoder_input_project=False,\n        positional_encoding=dict(\n            type='SinePositionalEncoding', num_feats=128, normalize=True),\n        transformer_decoder=dict(\n            type='DetrTransformerDecoder',\n            return_intermediate=True,\n            num_layers=6,\n            transformerlayers=dict(\n                type='DetrTransformerDecoderLayer',\n                attn_cfgs=dict(\n                    type='MultiheadAttention',\n                    embed_dims=256,\n                    num_heads=8,\n                    attn_drop=0.1,\n                    proj_drop=0.1,\n                    dropout_layer=None,\n                    batch_first=False),\n                ffn_cfgs=dict(\n                    embed_dims=256,\n                    feedforward_channels=2048,\n                    num_fcs=2,\n                    act_cfg=dict(type='ReLU', inplace=True),\n                    ffn_drop=0.1,\n                    dropout_layer=None,\n                    add_identity=True),\n                # the following parameter was not used,\n                # just make current api happy\n                feedforward_channels=2048,\n                operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                 'ffn', 'norm')),\n            init_cfg=None),\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            loss_weight=1.0,\n            reduction='mean',\n            class_weight=[1.0] * num_classes + [0.1]),\n        loss_mask=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            reduction='mean',\n            loss_weight=20.0),\n        loss_dice=dict(\n            type='DiceLoss',\n            use_sigmoid=True,\n            activate=True,\n            reduction='mean',\n            naive_dice=True,\n            eps=1.0,\n            loss_weight=1.0)),\n    panoptic_fusion_head=dict(\n        type='MaskFormerFusionHead',\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes,\n        loss_panoptic=None,\n        init_cfg=None),\n    train_cfg=dict(\n        assigner=dict(\n            type='MaskHungarianAssigner',\n            cls_cost=dict(type='ClassificationCost', weight=1.0),\n            mask_cost=dict(\n                type='FocalLossCost', weight=20.0, binary_input=True),\n            dice_cost=dict(\n                type='DiceCost', weight=1.0, pred_act=True, eps=1.0)),\n        sampler=dict(type='MaskPseudoSampler')),\n    test_cfg=dict(\n        panoptic_on=True,\n        # For now, the dataset does not support\n        # evaluating semantic segmentation metric.\n        semantic_on=False,\n        instance_on=False,\n        # max_per_image is for instance segmentation.\n        max_per_image=100,\n        object_mask_thr=0.8,\n        iou_thr=0.8,\n        # In MaskFormer's panoptic postprocessing,\n        # it will not filter masks whose score is smaller than 0.5 .\n        filter_low_score=False),\n    init_cfg=None)\n\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadPanopticAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        with_seg=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=1),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=1),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=1,\n    workers_per_gpu=1,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.0001,\n    eps=1e-8,\n    betas=(0.9, 0.999),\n    paramwise_cfg=dict(\n        custom_keys={\n            'backbone': dict(lr_mult=0.1, decay_mult=1.0),\n            'query_embed': dict(lr_mult=1.0, decay_mult=0.0)\n        },\n        norm_decay_mult=0.0))\noptimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2))\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    gamma=0.1,\n    by_epoch=True,\n    step=[50],\n    warmup='linear',\n    warmup_by_epoch=False,\n    warmup_ratio=1.0,  # no warmup\n    warmup_iters=10)\nrunner = dict(type='EpochBasedRunner', max_epochs=75)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py",
    "content": "_base_ = './maskformer_r50_mstrain_16x1_75e_coco.py'\n\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_large_patch4_window12_384_22k.pth'  # noqa\ndepths = [2, 2, 18, 2]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='SwinTransformer',\n        pretrain_img_size=384,\n        embed_dims=192,\n        patch_size=4,\n        window_size=12,\n        mlp_ratio=4,\n        depths=depths,\n        num_heads=[6, 12, 24, 48],\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.3,\n        patch_norm=True,\n        out_indices=(0, 1, 2, 3),\n        with_cp=False,\n        convert_weights=True,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    panoptic_head=dict(\n        in_channels=[192, 384, 768, 1536],  # pass to pixel_decoder inside\n        pixel_decoder=dict(\n            _delete_=True,\n            type='PixelDecoder',\n            norm_cfg=dict(type='GN', num_groups=32),\n            act_cfg=dict(type='ReLU')),\n        enforce_decoder_input_project=True))\n\n# weight_decay = 0.01\n# norm_weight_decay = 0.0\n# embed_weight_decay = 0.0\nembed_multi = dict(lr_mult=1.0, decay_mult=0.0)\nnorm_multi = dict(lr_mult=1.0, decay_mult=0.0)\ncustom_keys = {\n    'norm': norm_multi,\n    'absolute_pos_embed': embed_multi,\n    'relative_position_bias_table': embed_multi,\n    'query_embed': embed_multi\n}\n\n# optimizer\noptimizer = dict(\n    type='AdamW',\n    lr=6e-5,\n    weight_decay=0.01,\n    eps=1e-8,\n    betas=(0.9, 0.999),\n    paramwise_cfg=dict(custom_keys=custom_keys, norm_decay_mult=0.0))\noptimizer_config = dict(grad_clip=dict(max_norm=0.01, norm_type=2))\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    gamma=0.1,\n    by_epoch=True,\n    step=[250],\n    warmup='linear',\n    warmup_by_epoch=False,\n    warmup_ratio=1e-6,\n    warmup_iters=1500)\nrunner = dict(type='EpochBasedRunner', max_epochs=300)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/maskformer/metafile.yml",
    "content": "Collections:\n  - Name: MaskFormer\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Weight Decay\n      Training Resources: 16x V100 GPUs\n      Architecture:\n        - MaskFormer\n    Paper:\n      URL: https://arxiv.org/pdf/2107.06278\n      Title: 'Per-Pixel Classification is Not All You Need for Semantic Segmentation'\n    README: configs/maskformer/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/mmdet/models/detectors/maskformer.py#L7\n      Version: v2.22.0\n\nModels:\n  - Name: maskformer_r50_mstrain_16x1_75e_coco\n    In Collection: MaskFormer\n    Config: configs/maskformer/maskformer_r50_mstrain_16x1_75e_coco.py\n    Metadata:\n      Training Memory (GB): 16.2\n      Epochs: 75\n    Results:\n    - Task: Panoptic Segmentation\n      Dataset: COCO\n      Metrics:\n        PQ: 46.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_r50_mstrain_16x1_75e_coco/maskformer_r50_mstrain_16x1_75e_coco_20220221_141956-bc2699cb.pth\n  - Name: maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco\n    In Collection: MaskFormer\n    Config: configs/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco.py\n    Metadata:\n      Training Memory (GB): 27.2\n      Epochs: 300\n    Results:\n    - Task: Panoptic Segmentation\n      Dataset: COCO\n      Metrics:\n        PQ: 53.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/maskformer/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco/maskformer_swin-l-p4-w12_mstrain_64x1_300e_coco_20220326_221612-061b4eb8.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Mask Scoring R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RPN\n        - FPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/abs/1903.00241\n      Title: 'Mask Scoring R-CNN'\n    README: configs/ms_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/mask_scoring_rcnn.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: ms_rcnn_r50_caffe_fpn_1x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco/ms_rcnn_r50_caffe_fpn_1x_coco_20200702_180848-61c9355e.pth\n\n  - Name: ms_rcnn_r50_caffe_fpn_2x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco/ms_rcnn_r50_caffe_fpn_2x_coco_bbox_mAP-0.388__segm_mAP-0.363_20200506_004738-ee87b137.pth\n\n  - Name: ms_rcnn_r101_caffe_fpn_1x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.5\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco/ms_rcnn_r101_caffe_fpn_1x_coco_bbox_mAP-0.404__segm_mAP-0.376_20200506_004755-b9b12a37.pth\n\n  - Name: ms_rcnn_r101_caffe_fpn_2x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco/ms_rcnn_r101_caffe_fpn_2x_coco_bbox_mAP-0.411__segm_mAP-0.381_20200506_011134-5f3cc74f.pth\n\n  - Name: ms_rcnn_x101_32x4d_fpn_1x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.9\n      inference time (ms/im):\n        - value: 90.91\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth\n\n  - Name: ms_rcnn_x101_64x4d_fpn_1x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 11.0\n      inference time (ms/im):\n        - value: 125\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth\n\n  - Name: ms_rcnn_x101_64x4d_fpn_2x_coco\n    In Collection: Mask Scoring R-CNN\n    Config: configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 11.0\n      inference time (ms/im):\n        - value: 125\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_r101_caffe_fpn_2x_coco.py",
    "content": "_base_ = './ms_rcnn_r101_caffe_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    type='MaskScoringRCNN',\n    roi_head=dict(\n        type='MaskScoringRoIHead',\n        mask_iou_head=dict(\n            type='MaskIoUHead',\n            num_convs=4,\n            num_fcs=2,\n            roi_feat_size=14,\n            in_channels=256,\n            conv_out_channels=256,\n            fc_out_channels=1024,\n            num_classes=80)),\n    # model training and testing settings\n    train_cfg=dict(rcnn=dict(mask_thr_binary=0.5)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_r50_caffe_fpn_2x_coco.py",
    "content": "_base_ = './ms_rcnn_r50_caffe_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    type='MaskScoringRCNN',\n    roi_head=dict(\n        type='MaskScoringRoIHead',\n        mask_iou_head=dict(\n            type='MaskIoUHead',\n            num_convs=4,\n            num_fcs=2,\n            roi_feat_size=14,\n            in_channels=256,\n            conv_out_channels=256,\n            fc_out_channels=1024,\n            num_classes=80)),\n    # model training and testing settings\n    train_cfg=dict(rcnn=dict(mask_thr_binary=0.5)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './ms_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './ms_rcnn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py",
    "content": "_base_ = './ms_rcnn_x101_64x4d_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/nas_fcos/metafile.yml",
    "content": "Collections:\n  - Name: NAS-FCOS\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 4x V100 GPUs\n      Architecture:\n        - FPN\n        - NAS-FCOS\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1906.04423\n      Title: 'NAS-FCOS: Fast Neural Architecture Search for Object Detection'\n    README: configs/nas_fcos/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/detectors/nasfcos.py#L6\n      Version: v2.1.0\n\nModels:\n  - Name: nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco\n    In Collection: NAS-FCOS\n    Config: configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200520-1bdba3ce.pth\n\n  - Name: nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco\n    In Collection: NAS-FCOS\n    Config: configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco_20200521-7fdcbce0.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/nas_fcos/nas_fcos_fcoshead_r50_caffe_fpn_gn-head_4x4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='NASFCOS',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False, eps=0),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    neck=dict(\n        type='NASFCOS_FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5,\n        norm_cfg=dict(type='BN'),\n        conv_cfg=dict(type='DCNv2', deform_groups=2)),\n    bbox_head=dict(\n        type='FCOSHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128],\n        norm_cfg=dict(type='GN', num_groups=32),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='IoULoss', loss_weight=1.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\n\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\noptimizer = dict(\n    lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/nas_fcos/nas_fcos_nashead_r50_caffe_fpn_gn-head_4x4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='NASFCOS',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False, eps=0),\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    neck=dict(\n        type='NASFCOS_FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5,\n        norm_cfg=dict(type='BN'),\n        conv_cfg=dict(type='DCNv2', deform_groups=2)),\n    bbox_head=dict(\n        type='NASFCOSHead',\n        num_classes=80,\n        in_channels=256,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128],\n        norm_cfg=dict(type='GN', num_groups=32),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='IoULoss', loss_weight=1.0),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\n\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\n\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=4,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\noptimizer = dict(\n    lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/nas_fpn/metafile.yml",
    "content": "Collections:\n  - Name: NAS-FPN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - NAS-FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1904.07392\n      Title: 'NAS-FPN: Learning Scalable Feature Pyramid Architecture for Object Detection'\n    README: configs/nas_fpn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/necks/nas_fpn.py#L67\n      Version: v2.0.0\n\nModels:\n  - Name: retinanet_r50_fpn_crop640_50e_coco\n    In Collection: NAS-FPN\n    Config: configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 12.9\n      inference time (ms/im):\n        - value: 43.67\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_fpn_crop640_50e_coco/retinanet_r50_fpn_crop640_50e_coco-9b953d76.pth\n\n  - Name: retinanet_r50_nasfpn_crop640_50e_coco\n    In Collection: NAS-FPN\n    Config: configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py\n    Metadata:\n      Training Memory (GB): 13.2\n      inference time (ms/im):\n        - value: 43.48\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 50\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco/retinanet_r50_nasfpn_crop640_50e_coco-0ad1f644.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/nas_fpn/retinanet_r50_fpn_crop640_50e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\ncudnn_benchmark = True\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        relu_before_extra_convs=True,\n        no_norm_on_lateral=True,\n        norm_cfg=norm_cfg),\n    bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg),\n    # training and testing settings\n    train_cfg=dict(assigner=dict(neg_iou_thr=0.5)))\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=(640, 640),\n        ratio_range=(0.8, 1.2),\n        keep_ratio=True),\n    dict(type='RandomCrop', crop_size=(640, 640)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=(640, 640)),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(640, 640),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=64),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(\n    type='SGD',\n    lr=0.08,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.1,\n    step=[30, 40])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=50)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/nas_fpn/retinanet_r50_nasfpn_crop640_50e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\ncudnn_benchmark = True\n# model settings\nnorm_cfg = dict(type='BN', requires_grad=True)\nmodel = dict(\n    type='RetinaNet',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(type='NASFPN', stack_times=7, norm_cfg=norm_cfg),\n    bbox_head=dict(type='RetinaSepBNHead', num_ins=5, norm_cfg=norm_cfg),\n    # training and testing settings\n    train_cfg=dict(assigner=dict(neg_iou_thr=0.5)))\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=(640, 640),\n        ratio_range=(0.8, 1.2),\n        keep_ratio=True),\n    dict(type='RandomCrop', crop_size=(640, 640)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=(640, 640)),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(640, 640),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=128),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(\n    type='SGD',\n    lr=0.08,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(norm_decay_mult=0, bypass_duplicate=True))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.1,\n    step=[30, 40])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=50)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/openimages_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(roi_head=dict(bbox_head=dict(num_classes=601)))\n\n# Using 32 GPUS while training\noptimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=26000,\n    warmup_ratio=1.0 / 64,\n    step=[8, 11])\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py",
    "content": "_base_ = ['faster_rcnn_r50_fpn_32x2_1x_openimages.py']\n\nmodel = dict(\n    roi_head=dict(bbox_head=dict(num_classes=500)),\n    test_cfg=dict(rcnn=dict(score_thr=0.01)))\n\n# dataset settings\ndataset_type = 'OpenImagesChallengeDataset'\ndata_root = 'data/OpenImages/'\ndata = dict(\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'challenge2019/challenge-2019-train-detection-bbox.txt',\n        img_prefix=data_root + 'OpenImages/',\n        label_file=data_root + 'challenge2019/cls-label-description.csv',\n        hierarchy_file=data_root + 'challenge2019/class_label_tree.np'),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'challenge2019/challenge-2019-validation-detection-bbox.txt',\n        img_prefix=data_root + 'OpenImages/',\n        label_file=data_root + 'challenge2019/cls-label-description.csv',\n        hierarchy_file=data_root + 'challenge2019/class_label_tree.np',\n        meta_file=data_root +\n        'challenge2019/challenge-2019-validation-metas.pkl',\n        image_level_ann_file=data_root +\n        'challenge2019/challenge-2019-validation-detection-'\n        'human-imagelabels.csv'),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root +\n        'challenge2019/challenge-2019-validation-detection-bbox.txt',\n        img_prefix=data_root + 'OpenImages/',\n        label_file=data_root + 'challenge2019/cls-label-description.csv',\n        hierarchy_file=data_root + 'challenge2019/class_label_tree.np',\n        meta_file=data_root +\n        'challenge2019/challenge-2019-validation-metas.pkl',\n        image_level_ann_file=data_root +\n        'challenge2019/challenge-2019-validation-detection-'\n        'human-imagelabels.csv'))\nevaluation = dict(interval=1, metric='mAP')\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py",
    "content": "_base_ = ['faster_rcnn_r50_fpn_32x2_1x_openimages.py']\n\n# Use ClassAwareSampler\ndata = dict(\n    train_dataloader=dict(class_aware_sampler=dict(num_sample_class=1)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py",
    "content": "_base_ = ['faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py']\n\n# Use ClassAwareSampler\ndata = dict(\n    train_dataloader=dict(class_aware_sampler=dict(num_sample_class=1)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/metafile.yml",
    "content": "Models:\n  - Name: faster_rcnn_r50_fpn_32x2_1x_openimages\n    In Collection: Faster R-CNN\n    Config: configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages.py\n    Metadata:\n      Training Memory (GB): 7.7\n      Epochs: 12\n      Training Data: Open Images v6\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n    Results:\n      - Task: Object Detection\n        Dataset: Open Images v6\n        Metrics:\n          box AP: 51.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_20211130_231159-e87ab7ce.pth\n\n  - Name: retinanet_r50_fpn_32x2_1x_openimages\n    In Collection: RetinaNet\n    Config: configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py\n    Metadata:\n      Training Memory (GB): 6.6\n      Epochs: 12\n      Training Data: Open Images v6\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n    Results:\n      - Task: Object Detection\n        Dataset: Open Images v6\n        Metrics:\n          box AP: 61.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/retinanet_r50_fpn_32x2_1x_openimages/retinanet_r50_fpn_32x2_1x_openimages_20211223_071954-d2ae5462.pth\n\n  - Name: ssd300_32x8_36e_openimages\n    In Collection: SSD\n    Config: configs/openimages/ssd300_32x8_36e_openimages.py\n    Metadata:\n      Training Memory (GB): 10.8\n      Epochs: 36\n      Training Data: Open Images v6\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n    Results:\n      - Task: Object Detection\n        Dataset: Open Images v6\n        Metrics:\n          box AP: 35.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/ssd300_32x8_36e_openimages/ssd300_32x8_36e_openimages_20211224_000232-dce93846.pth\n\n  - Name: faster_rcnn_r50_fpn_32x2_1x_openimages_challenge\n    In Collection: Faster R-CNN\n    Config: configs/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge.py\n    Metadata:\n      Training Memory (GB): 7.7\n      Epochs: 12\n      Training Data: Open Images Challenge 2019\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n    Results:\n      - Task: Object Detection\n        Dataset: Open Images Challenge 2019\n        Metrics:\n          box AP: 54.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_1x_openimages_challenge_20220114_045100-0e79e5df.pth\n\n  - Name: faster_rcnn_r50_fpn_32x2_cas_1x_openimages\n    In Collection: Faster R-CNN\n    Config: configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages.py\n    Metadata:\n      Training Memory (GB): 7.7\n      Epochs: 12\n      Training Data: Open Images Challenge 2019\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n    Results:\n      - Task: Object Detection\n        Dataset: Open Images Challenge 2019\n        Metrics:\n          box AP: 60.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_20220306_202424-98c630e5.pth\n\n  - Name: faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge\n    In Collection: Faster R-CNN\n    Config: configs/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge.py\n    Metadata:\n      Training Memory (GB): 7.1\n      Epochs: 12\n      Training Data: Open Images Challenge 2019\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n    Results:\n      - Task: Object Detection\n        Dataset: Open Images Challenge 2019\n        Metrics:\n          box AP: 65.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/openimages/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge/faster_rcnn_r50_fpn_32x2_cas_1x_openimages_challenge_20220221_192021-34c402d9.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/retinanet_r50_fpn_32x2_1x_openimages.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/openimages_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(bbox_head=dict(num_classes=601))\n\noptimizer = dict(type='SGD', lr=0.08, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=26000,\n    warmup_ratio=1.0 / 64,\n    step=[8, 11])\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/openimages/ssd300_32x8_36e_openimages.py",
    "content": "_base_ = [\n    '../_base_/models/ssd300.py', '../_base_/datasets/openimages_detection.py',\n    '../_base_/default_runtime.py', '../_base_/schedules/schedule_1x.py'\n]\nmodel = dict(\n    bbox_head=dict(\n        num_classes=601,\n        anchor_generator=dict(basesize_ratio_range=(0.2, 0.9))))\n# dataset settings\ndataset_type = 'OpenImagesDataset'\ndata_root = 'data/OpenImages/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True, normed_bbox=True),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(300, 300), keep_ratio=False),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(300, 300),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,  # using 32 GPUS while training.\n    workers_per_gpu=0,  # workers_per_gpu > 0 may occur out of memory\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root +\n            'annotations/oidv6-train-annotations-bbox.csv',\n            img_prefix=data_root + 'OpenImages/train/',\n            label_file=data_root +\n            'annotations/class-descriptions-boxable.csv',\n            hierarchy_file=data_root +\n            'annotations/bbox_labels_600_hierarchy.json',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.04, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=20000,\n    warmup_ratio=0.001,\n    step=[8, 11])\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (32 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=256)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/metafile.yml",
    "content": "Collections:\n  - Name: PAA\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - Probabilistic Anchor Assignment\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/2007.08103\n      Title: 'Probabilistic Anchor Assignment with IoU Prediction for Object Detection'\n    README: configs/paa/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.4.0/mmdet/models/detectors/paa.py#L6\n      Version: v2.4.0\n\nModels:\n  - Name: paa_r50_fpn_1x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1x_coco/paa_r50_fpn_1x_coco_20200821-936edec3.pth\n\n  - Name: paa_r50_fpn_1.5x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r50_fpn_1.5x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_1.5x_coco/paa_r50_fpn_1.5x_coco_20200823-805d6078.pth\n\n  - Name: paa_r50_fpn_2x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r50_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_2x_coco/paa_r50_fpn_2x_coco_20200821-c98bfc4e.pth\n\n  - Name: paa_r50_fpn_mstrain_3x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r50_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 3.7\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r50_fpn_mstrain_3x_coco/paa_r50_fpn_mstrain_3x_coco_20210121_145722-06a6880b.pth\n\n  - Name: paa_r101_fpn_1x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_1x_coco/paa_r101_fpn_1x_coco_20200821-0a1825a4.pth\n\n  - Name: paa_r101_fpn_2x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r101_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 6.2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_2x_coco/paa_r101_fpn_2x_coco_20200821-6829f96b.pth\n\n  - Name: paa_r101_fpn_mstrain_3x_coco\n    In Collection: PAA\n    Config: configs/paa/paa_r101_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 6.2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/paa/paa_r101_fpn_mstrain_3x_coco/paa_r101_fpn_mstrain_3x_coco_20210122_084202-83250d22.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r101_fpn_1x_coco.py",
    "content": "_base_ = './paa_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r101_fpn_2x_coco.py",
    "content": "_base_ = './paa_r101_fpn_1x_coco.py'\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r101_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './paa_r50_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r50_fpn_1.5x_coco.py",
    "content": "_base_ = './paa_r50_fpn_1x_coco.py'\nlr_config = dict(step=[12, 16])\nrunner = dict(type='EpochBasedRunner', max_epochs=18)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='PAA',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='PAAHead',\n        reg_decoded_bbox=True,\n        score_voting=True,\n        topk=9,\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.1,\n            neg_iou_thr=0.1,\n            min_pos_iou=0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r50_fpn_2x_coco.py",
    "content": "_base_ = './paa_r50_fpn_1x_coco.py'\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/paa/paa_r50_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './paa_r50_fpn_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    neck=dict(\n        type='PAFPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pafpn/metafile.yml",
    "content": "Collections:\n  - Name: PAFPN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - PAFPN\n    Paper:\n      URL: https://arxiv.org/abs/1803.01534\n      Title: 'Path Aggregation Network for Instance Segmentation'\n    README: configs/pafpn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/necks/pafpn.py#L11\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_pafpn_1x_coco\n    In Collection: PAFPN\n    Config: configs/pafpn/faster_rcnn_r50_pafpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 58.14\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pafpn/faster_rcnn_r50_pafpn_1x_coco/faster_rcnn_r50_pafpn_1x_coco_bbox_mAP-0.375_20200503_105836-b7b4b9bd.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/panoptic_fpn/metafile.yml",
    "content": "Collections:\n  - Name: PanopticFPN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - PanopticFPN\n    Paper:\n      URL: https://arxiv.org/pdf/1901.02446\n      Title: 'Panoptic feature pyramid networks'\n    README: configs/panoptic_fpn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/detectors/panoptic_fpn.py#L7\n      Version: v2.16.0\n\nModels:\n  - Name: panoptic_fpn_r50_fpn_1x_coco\n    In Collection: PanopticFPN\n    Config: configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.6\n      Epochs: 12\n    Results:\n    - Task: Panoptic Segmentation\n      Dataset: COCO\n      Metrics:\n        PQ: 40.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco/panoptic_fpn_r50_fpn_1x_coco_20210821_101153-9668fd13.pth\n\n  - Name: panoptic_fpn_r50_fpn_mstrain_3x_coco\n    In Collection: PanopticFPN\n    Config: configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.6\n      Epochs: 36\n    Results:\n    - Task: Panoptic Segmentation\n      Dataset: COCO\n      Metrics:\n        PQ: 42.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco/panoptic_fpn_r50_fpn_mstrain_3x_coco_20210824_171155-5650f98b.pth\n\n  - Name: panoptic_fpn_r101_fpn_1x_coco\n    In Collection: PanopticFPN\n    Config: configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.5\n      Epochs: 12\n    Results:\n    - Task: Panoptic Segmentation\n      Dataset: COCO\n      Metrics:\n        PQ: 42.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco/panoptic_fpn_r101_fpn_1x_coco_20210820_193950-ab9157a2.pth\n\n  - Name: panoptic_fpn_r101_fpn_mstrain_3x_coco\n    In Collection: PanopticFPN\n    Config: configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 6.5\n      Epochs: 36\n    Results:\n    - Task: Panoptic Segmentation\n      Dataset: COCO\n      Metrics:\n        PQ: 44.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco/panoptic_fpn_r101_fpn_mstrain_3x_coco_20210823_114712-9c99acc4.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/panoptic_fpn/panoptic_fpn_r101_fpn_1x_coco.py",
    "content": "_base_ = './panoptic_fpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/panoptic_fpn/panoptic_fpn_r101_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './panoptic_fpn_r50_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_panoptic.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='PanopticFPN',\n    semantic_head=dict(\n        type='PanopticFPNHead',\n        num_things_classes=80,\n        num_stuff_classes=53,\n        in_channels=256,\n        inner_channels=128,\n        start_level=0,\n        end_level=4,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n        conv_cfg=None,\n        loss_seg=dict(\n            type='CrossEntropyLoss', ignore_index=255, loss_weight=0.5)),\n    panoptic_fusion_head=dict(\n        type='HeuristicFusionHead',\n        num_things_classes=80,\n        num_stuff_classes=53),\n    test_cfg=dict(\n        panoptic=dict(\n            score_thr=0.6,\n            max_per_img=100,\n            mask_thr_binary=0.5,\n            mask_overlap=0.5,\n            nms=dict(type='nms', iou_threshold=0.5, class_agnostic=True),\n            stuff_area_limit=4096)))\n\ncustom_hooks = []\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/panoptic_fpn/panoptic_fpn_r50_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './panoptic_fpn_r50_fpn_1x_coco.py'\n\n# dataset settings\ndataset_type = 'CocoPanopticDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# In mstrain 3x config, img_scale=[(1333, 640), (1333, 800)],\n# multiscale_mode='range'\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadPanopticAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        with_seg=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='SegRescale', scale_factor=1 / 4),\n    dict(type='DefaultFormatBundle'),\n    dict(\n        type='Collect',\n        keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks', 'gt_semantic_seg']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/panoptic_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            seg_prefix=data_root + 'annotations/panoptic_train2017/',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pascal_voc/faster_rcnn_r50_caffe_c4_mstrain_18k_voc0712.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_caffe_c4.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(roi_head=dict(bbox_head=dict(num_classes=20)))\n\n# dataset settings\ndataset_type = 'VOCDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 512), (1333, 544), (1333, 576),\n                   (1333, 608), (1333, 640), (1333, 672), (1333, 704),\n                   (1333, 736), (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=[\n            data_root + 'VOC2007/ImageSets/Main/trainval.txt',\n            data_root + 'VOC2012/ImageSets/Main/trainval.txt'\n        ],\n        img_prefix=[data_root + 'VOC2007/', data_root + 'VOC2012/'],\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'VOC2007/ImageSets/Main/test.txt',\n        img_prefix=data_root + 'VOC2007/',\n        pipeline=test_pipeline))\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=100,\n    warmup_ratio=0.001,\n    step=[12000, 16000])\n\n# Runner type\nrunner = dict(type='IterBasedRunner', max_iters=18000)\n\ncheckpoint_config = dict(interval=3000)\nevaluation = dict(interval=3000, metric='mAP')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(roi_head=dict(bbox_head=dict(num_classes=20)))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\n# actual epoch = 3 * 3 = 9\nlr_config = dict(policy='step', step=[3])\n# runtime settings\nrunner = dict(\n    type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pascal_voc/faster_rcnn_r50_fpn_1x_voc0712_cocofmt.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py', '../_base_/datasets/voc0712.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(roi_head=dict(bbox_head=dict(num_classes=20)))\n\nCLASSES = ('aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car',\n           'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',\n           'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor')\n\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1000, 600), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1000, 600),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type='RepeatDataset',\n        times=3,\n        dataset=dict(\n            type=dataset_type,\n            ann_file='data/voc0712_trainval.json',\n            img_prefix='data/VOCdevkit',\n            pipeline=train_pipeline,\n            classes=CLASSES)),\n    val=dict(\n        type=dataset_type,\n        ann_file='data/voc07_test.json',\n        img_prefix='data/VOCdevkit',\n        pipeline=test_pipeline,\n        classes=CLASSES),\n    test=dict(\n        type=dataset_type,\n        ann_file='data/voc07_test.json',\n        img_prefix='data/VOCdevkit',\n        pipeline=test_pipeline,\n        classes=CLASSES))\nevaluation = dict(interval=1, metric='bbox')\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\n# actual epoch = 3 * 3 = 9\nlr_config = dict(policy='step', step=[3])\n# runtime settings\nrunner = dict(\n    type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pascal_voc/retinanet_r50_fpn_1x_voc0712.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py', '../_base_/datasets/voc0712.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(bbox_head=dict(num_classes=20))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(grad_clip=None)\n# learning policy\n# actual epoch = 3 * 3 = 9\nlr_config = dict(policy='step', step=[3])\n# runtime settings\nrunner = dict(\n    type='EpochBasedRunner', max_epochs=4)  # actual epoch = 4 * 3 = 12\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pascal_voc/ssd300_voc0712.py",
    "content": "_base_ = [\n    '../_base_/models/ssd300.py', '../_base_/datasets/voc0712.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(\n    bbox_head=dict(\n        num_classes=20, anchor_generator=dict(basesize_ratio_range=(0.2,\n                                                                    0.9))))\n# dataset settings\ndataset_type = 'VOCDataset'\ndata_root = 'data/VOCdevkit/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(300, 300), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(300, 300),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=3,\n    train=dict(\n        type='RepeatDataset', times=10, dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[16, 20])\ncheckpoint_config = dict(interval=1)\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pascal_voc/ssd512_voc0712.py",
    "content": "_base_ = 'ssd300_voc0712.py'\ninput_size = 512\nmodel = dict(\n    neck=dict(\n        out_channels=(512, 1024, 512, 256, 256, 256, 256),\n        level_strides=(2, 2, 2, 2, 1),\n        level_paddings=(1, 1, 1, 1, 1),\n        last_kernel_size=4),\n    bbox_head=dict(\n        in_channels=(512, 1024, 512, 256, 256, 256, 256),\n        anchor_generator=dict(\n            input_size=input_size,\n            strides=[8, 16, 32, 64, 128, 256, 512],\n            basesize_ratio_range=(0.15, 0.9),\n            ratios=([2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]))))\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(512, 512), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(512, 512),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/metafile.yml",
    "content": "Collections:\n  - Name: PISA\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - PISA\n        - RPN\n        - ResNet\n        - RoIPool\n    Paper:\n      URL: https://arxiv.org/abs/1904.04821\n      Title: 'Prime Sample Attention in Object Detection'\n    README: configs/pisa/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/roi_heads/pisa_roi_head.py#L8\n      Version: v2.1.0\n\nModels:\n  - Name: pisa_faster_rcnn_r50_fpn_1x_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_r50_fpn_1x_coco/pisa_faster_rcnn_r50_fpn_1x_coco-dea93523.pth\n\n  - Name: pisa_faster_rcnn_x101_32x4d_fpn_1x_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco-e4accec4.pth\n\n  - Name: pisa_mask_rcnn_r50_fpn_1x_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 35.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_mask_rcnn_r50_fpn_1x_coco/pisa_mask_rcnn_r50_fpn_1x_coco-dfcedba6.pth\n\n  - Name: pisa_retinanet_r50_fpn_1x_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_r50_fpn_1x_coco/pisa_retinanet_r50_fpn_1x_coco-76409952.pth\n\n  - Name: pisa_retinanet_x101_32x4d_fpn_1x_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco/pisa_retinanet_x101_32x4d_fpn_1x_coco-a0c13c73.pth\n\n  - Name: pisa_ssd300_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_ssd300_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 27.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd300_coco/pisa_ssd300_coco-710e3ac9.pth\n\n  - Name: pisa_ssd512_coco\n    In Collection: PISA\n    Config: configs/pisa/pisa_ssd512_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 31.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pisa/pisa_ssd512_coco/pisa_ssd512_coco-247addee.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_faster_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    roi_head=dict(\n        type='PISARoIHead',\n        bbox_head=dict(\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    train_cfg=dict(\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            sampler=dict(\n                type='ScoreHLRSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True,\n                k=0.5,\n                bias=0.),\n            isr=dict(k=2, bias=0),\n            carl=dict(k=1, bias=0.2))),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_faster_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_x101_32x4d_fpn_1x_coco.py'\n\nmodel = dict(\n    roi_head=dict(\n        type='PISARoIHead',\n        bbox_head=dict(\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    train_cfg=dict(\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            sampler=dict(\n                type='ScoreHLRSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True,\n                k=0.5,\n                bias=0.),\n            isr=dict(k=2, bias=0),\n            carl=dict(k=1, bias=0.2))),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_mask_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    roi_head=dict(\n        type='PISARoIHead',\n        bbox_head=dict(\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    train_cfg=dict(\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            sampler=dict(\n                type='ScoreHLRSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True,\n                k=0.5,\n                bias=0.),\n            isr=dict(k=2, bias=0),\n            carl=dict(k=1, bias=0.2))),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_mask_rcnn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py'\n\nmodel = dict(\n    roi_head=dict(\n        type='PISARoIHead',\n        bbox_head=dict(\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))),\n    train_cfg=dict(\n        rpn_proposal=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0),\n        rcnn=dict(\n            sampler=dict(\n                type='ScoreHLRSampler',\n                num=512,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True,\n                k=0.5,\n                bias=0.),\n            isr=dict(k=2, bias=0),\n            carl=dict(k=1, bias=0.2))),\n    test_cfg=dict(\n        rpn=dict(\n            nms_pre=2000,\n            max_per_img=2000,\n            nms=dict(type='nms', iou_threshold=0.7),\n            min_bbox_size=0)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_retinanet_r50_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_r50_fpn_1x_coco.py'\n\nmodel = dict(\n    bbox_head=dict(\n        type='PISARetinaHead',\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)),\n    train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_retinanet_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = '../retinanet/retinanet_x101_32x4d_fpn_1x_coco.py'\n\nmodel = dict(\n    bbox_head=dict(\n        type='PISARetinaHead',\n        loss_bbox=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0)),\n    train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_ssd300_coco.py",
    "content": "_base_ = '../ssd/ssd300_coco.py'\n\nmodel = dict(\n    bbox_head=dict(type='PISASSDHead'),\n    train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2)))\n\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pisa/pisa_ssd512_coco.py",
    "content": "_base_ = '../ssd/ssd512_coco.py'\n\nmodel = dict(\n    bbox_head=dict(type='PISASSDHead'),\n    train_cfg=dict(isr=dict(k=2., bias=0.), carl=dict(k=1., bias=0.2)))\n\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/point_rend/metafile.yml",
    "content": "Collections:\n  - Name: PointRend\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - PointRend\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1912.08193\n      Title: 'PointRend: Image Segmentation as Rendering'\n    README: configs/point_rend/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.2.0/mmdet/models/detectors/point_rend.py#L6\n      Version: v2.2.0\n\nModels:\n  - Name: point_rend_r50_caffe_fpn_mstrain_1x_coco\n    In Collection: PointRend\n    Config: configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.6\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco/point_rend_r50_caffe_fpn_mstrain_1x_coco-1bcb5fb4.pth\n\n  - Name: point_rend_r50_caffe_fpn_mstrain_3x_coco\n    In Collection: PointRend\n    Config: configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.6\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco/point_rend_r50_caffe_fpn_mstrain_3x_coco-e0ebb6b7.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py'\n# model settings\nmodel = dict(\n    type='PointRend',\n    roi_head=dict(\n        type='PointRendRoIHead',\n        mask_roi_extractor=dict(\n            type='GenericRoIExtractor',\n            aggregation='concat',\n            roi_layer=dict(\n                _delete_=True, type='SimpleRoIAlign', output_size=14),\n            out_channels=256,\n            featmap_strides=[4]),\n        mask_head=dict(\n            _delete_=True,\n            type='CoarseMaskHead',\n            num_fcs=2,\n            in_channels=256,\n            conv_out_channels=256,\n            fc_out_channels=1024,\n            num_classes=80,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),\n        point_head=dict(\n            type='MaskPointHead',\n            num_fcs=3,\n            in_channels=256,\n            fc_channels=256,\n            num_classes=80,\n            coarse_pred_each_layer=True,\n            loss_point=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),\n    # model training and testing settings\n    train_cfg=dict(\n        rcnn=dict(\n            mask_size=7,\n            num_points=14 * 14,\n            oversample_ratio=3,\n            importance_sample_ratio=0.75)),\n    test_cfg=dict(\n        rcnn=dict(\n            subdivision_steps=5,\n            subdivision_num_points=28 * 28,\n            scale_factor=2)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/point_rend/point_rend_r50_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './point_rend_r50_caffe_fpn_mstrain_1x_coco.py'\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/metafile.yml",
    "content": "Models:\n  - Name: retinanet_pvt-t_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvt-t_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-t_fpn_1x_coco/retinanet_pvt-t_fpn_1x_coco_20210831_103110-17b566bd.pth\n    Paper:\n      URL: https://arxiv.org/abs/2102.12122\n      Title: \"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L315\n      Version: 2.17.0\n\n  - Name: retinanet_pvt-s_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvt-s_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 14.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-s_fpn_1x_coco/retinanet_pvt-s_fpn_1x_coco_20210906_142921-b6c94a5b.pth\n    Paper:\n      URL: https://arxiv.org/abs/2102.12122\n      Title: \"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L315\n      Version: 2.17.0\n\n  - Name: retinanet_pvt-m_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvt-m_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 20.9\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvt-m_fpn_1x_coco/retinanet_pvt-m_fpn_1x_coco_20210831_103243-55effa1b.pth\n    Paper:\n      URL: https://arxiv.org/abs/2102.12122\n      Title: \"Pyramid Vision Transformer: A Versatile Backbone for Dense Prediction without Convolutions\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L315\n      Version: 2.17.0\n\n  - Name: retinanet_pvtv2-b0_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.4\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformerV2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b0_fpn_1x_coco/retinanet_pvtv2-b0_fpn_1x_coco_20210831_103157-13e9aabe.pth\n    Paper:\n      URL: https://arxiv.org/abs/2106.13797\n      Title: \"PVTv2: Improved Baselines with Pyramid Vision Transformer\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543\n      Version: 2.17.0\n\n  - Name: retinanet_pvtv2-b1_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 9.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformerV2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b1_fpn_1x_coco/retinanet_pvtv2-b1_fpn_1x_coco_20210831_103318-7e169a7d.pth\n    Paper:\n      URL: https://arxiv.org/abs/2106.13797\n      Title: \"PVTv2: Improved Baselines with Pyramid Vision Transformer\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543\n      Version: 2.17.0\n\n  - Name: retinanet_pvtv2-b2_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 16.2\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformerV2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b2_fpn_1x_coco/retinanet_pvtv2-b2_fpn_1x_coco_20210901_174843-529f0b9a.pth\n    Paper:\n      URL: https://arxiv.org/abs/2106.13797\n      Title: \"PVTv2: Improved Baselines with Pyramid Vision Transformer\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543\n      Version: 2.17.0\n\n  - Name: retinanet_pvtv2-b3_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 23.0\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformerV2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b3_fpn_1x_coco/retinanet_pvtv2-b3_fpn_1x_coco_20210903_151512-8357deff.pth\n    Paper:\n      URL: https://arxiv.org/abs/2106.13797\n      Title: \"PVTv2: Improved Baselines with Pyramid Vision Transformer\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543\n      Version: 2.17.0\n\n  - Name: retinanet_pvtv2-b4_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 17.0\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformerV2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b4_fpn_1x_coco/retinanet_pvtv2-b4_fpn_1x_coco_20210901_170151-83795c86.pth\n    Paper:\n      URL: https://arxiv.org/abs/2106.13797\n      Title: \"PVTv2: Improved Baselines with Pyramid Vision Transformer\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543\n      Version: 2.17.0\n\n  - Name: retinanet_pvtv2-b5_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 18.7\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x NVIDIA V100 GPUs\n      Architecture:\n        - PyramidVisionTransformerV2\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/pvt/retinanet_pvtv2-b5_fpn_1x_coco/retinanet_pvtv2-b5_fpn_1x_coco_20210902_201800-3420eb57.pth\n    Paper:\n      URL: https://arxiv.org/abs/2106.13797\n      Title: \"PVTv2: Improved Baselines with Pyramid Vision Transformer\"\n    README: configs/pvt/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.17.0/mmdet/models/backbones/pvt.py#L543\n      Version: 2.17.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvt-l_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvt-t_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        num_layers=[3, 8, 27, 3],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_large.pth')))\nfp16 = dict(loss_scale=dict(init_scale=512))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvt-m_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvt-t_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        num_layers=[3, 4, 18, 3],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_medium.pth')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvt-s_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvt-t_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        num_layers=[3, 4, 6, 3],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_small.pth')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvt-t_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='RetinaNet',\n    backbone=dict(\n        _delete_=True,\n        type='PyramidVisionTransformer',\n        num_layers=[2, 2, 2, 2],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_tiny.pth')),\n    neck=dict(in_channels=[64, 128, 320, 512]))\n# optimizer\noptimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvtv2-b0_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='RetinaNet',\n    backbone=dict(\n        _delete_=True,\n        type='PyramidVisionTransformerV2',\n        embed_dims=32,\n        num_layers=[2, 2, 2, 2],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_v2_b0.pth')),\n    neck=dict(in_channels=[32, 64, 160, 256]))\n# optimizer\noptimizer = dict(_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvtv2-b1_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        embed_dims=64,\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_v2_b1.pth')),\n    neck=dict(in_channels=[64, 128, 320, 512]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvtv2-b2_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        embed_dims=64,\n        num_layers=[3, 4, 6, 3],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_v2_b2.pth')),\n    neck=dict(in_channels=[64, 128, 320, 512]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvtv2-b3_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        embed_dims=64,\n        num_layers=[3, 4, 18, 3],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_v2_b3.pth')),\n    neck=dict(in_channels=[64, 128, 320, 512]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvtv2-b4_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        embed_dims=64,\n        num_layers=[3, 8, 27, 3],\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_v2_b4.pth')),\n    neck=dict(in_channels=[64, 128, 320, 512]))\n# optimizer\noptimizer = dict(\n    _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001)\n# dataset settings\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (1 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/pvt/retinanet_pvtv2-b5_fpn_1x_coco.py",
    "content": "_base_ = 'retinanet_pvtv2-b0_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        embed_dims=64,\n        num_layers=[3, 6, 40, 3],\n        mlp_ratios=(4, 4, 4, 4),\n        init_cfg=dict(checkpoint='https://github.com/whai362/PVT/'\n                      'releases/download/v2/pvt_v2_b5.pth')),\n    neck=dict(in_channels=[64, 128, 320, 512]))\n# optimizer\noptimizer = dict(\n    _delete_=True, type='AdamW', lr=0.0001 / 1.4, weight_decay=0.0001)\n# dataset settings\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (1 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/queryinst/metafile.yml",
    "content": "Collections:\n  - Name: QueryInst\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n        - QueryInst\n    Paper:\n      URL: https://openaccess.thecvf.com/content/ICCV2021/papers/Fang_Instances_As_Queries_ICCV_2021_paper.pdf\n      Title: 'Instances as Queries'\n    README: configs/queryinst/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/detectors/queryinst.py\n      Version: v2.18.0\n\nModels:\n  - Name: queryinst_r50_fpn_1x_coco\n    In Collection: QueryInst\n    Config: configs/queryinst/queryinst_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth\n\n  - Name: queryinst_r50_fpn_mstrain_480-800_3x_coco\n    In Collection: QueryInst\n    Config: configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth\n\n  - Name: queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco\n    In Collection: QueryInst\n    Config: configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth\n\n  - Name: queryinst_r101_fpn_mstrain_480-800_3x_coco\n    In Collection: QueryInst\n    Config: configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth\n\n  - Name: queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco\n    In Collection: QueryInst\n    Config: configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 49.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 42.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './queryinst_r50_fpn_mstrain_480-800_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/queryinst/queryinst_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nnum_stages = 6\nnum_proposals = 100\nmodel = dict(\n    type='QueryInst',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=0,\n        add_extra_convs='on_input',\n        num_outs=4),\n    rpn_head=dict(\n        type='EmbeddingRPNHead',\n        num_proposals=num_proposals,\n        proposal_feature_channel=256),\n    roi_head=dict(\n        type='SparseRoIHead',\n        num_stages=num_stages,\n        stage_loss_weights=[1] * num_stages,\n        proposal_feature_channel=256,\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='DIIHead',\n                num_classes=80,\n                num_ffn_fcs=2,\n                num_heads=8,\n                num_cls_fcs=1,\n                num_reg_fcs=3,\n                feedforward_channels=2048,\n                in_channels=256,\n                dropout=0.0,\n                ffn_act_cfg=dict(type='ReLU', inplace=True),\n                dynamic_conv_cfg=dict(\n                    type='DynamicConv',\n                    in_channels=256,\n                    feat_channels=64,\n                    out_channels=256,\n                    input_feat_shape=7,\n                    act_cfg=dict(type='ReLU', inplace=True),\n                    norm_cfg=dict(type='LN')),\n                loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n                loss_iou=dict(type='GIoULoss', loss_weight=2.0),\n                loss_cls=dict(\n                    type='FocalLoss',\n                    use_sigmoid=True,\n                    gamma=2.0,\n                    alpha=0.25,\n                    loss_weight=2.0),\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    clip_border=False,\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.5, 0.5, 1., 1.])) for _ in range(num_stages)\n        ],\n        mask_head=[\n            dict(\n                type='DynamicMaskHead',\n                dynamic_conv_cfg=dict(\n                    type='DynamicConv',\n                    in_channels=256,\n                    feat_channels=64,\n                    out_channels=256,\n                    input_feat_shape=14,\n                    with_proj=False,\n                    act_cfg=dict(type='ReLU', inplace=True),\n                    norm_cfg=dict(type='LN')),\n                num_convs=4,\n                num_classes=80,\n                roi_feat_size=14,\n                in_channels=256,\n                conv_kernel_size=3,\n                conv_out_channels=256,\n                class_agnostic=False,\n                norm_cfg=dict(type='BN'),\n                upsample_cfg=dict(type='deconv', scale_factor=2),\n                loss_mask=dict(\n                    type='DiceLoss',\n                    loss_weight=8.0,\n                    use_sigmoid=True,\n                    activate=False,\n                    eps=1e-5)) for _ in range(num_stages)\n        ]),\n    # training and testing settings\n    train_cfg=dict(\n        rpn=None,\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='HungarianAssigner',\n                    cls_cost=dict(type='FocalLossCost', weight=2.0),\n                    reg_cost=dict(type='BBoxL1Cost', weight=5.0),\n                    iou_cost=dict(type='IoUCost', iou_mode='giou',\n                                  weight=2.0)),\n                sampler=dict(type='PseudoSampler'),\n                pos_weight=1,\n                mask_size=28,\n            ) for _ in range(num_stages)\n        ]),\n    test_cfg=dict(\n        rpn=None, rcnn=dict(max_per_img=num_proposals, mask_thr_binary=0.5)))\n\n# optimizer\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(\n        custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}))\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=0.1, norm_type=2))\n# learning policy\nlr_config = dict(policy='step', step=[8, 11], warmup_iters=1000)\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './queryinst_r50_fpn_mstrain_480-800_3x_coco.py'\nnum_proposals = 300\nmodel = dict(\n    rpn_head=dict(num_proposals=num_proposals),\n    test_cfg=dict(\n        _delete_=True,\n        rpn=None,\n        rcnn=dict(max_per_img=num_proposals, mask_thr_binary=0.5)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# augmentation strategy originates from DETR.\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './queryinst_r50_fpn_1x_coco.py'\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nmin_values = (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, value) for value in min_values],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])\n]\n\ndata = dict(train=dict(pipeline=train_pipeline))\nlr_config = dict(policy='step', step=[27, 33])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[72, 168, 408, 912],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco_instance.py',\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py'\n]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[96, 192, 432, 1008],\n        out_channels=256,\n        num_outs=5))\nimg_norm_cfg = dict(\n    # The mean and std are used in PyCls when training RegNets\n    mean=[103.53, 116.28, 123.675],\n    std=[57.375, 57.12, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    # Images are converted to float32 directly after loading in PyCls\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\noptimizer = dict(weight_decay=0.00005)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_400mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_400mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[32, 64, 160, 384],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_4.0gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[80, 240, 560, 1360],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_800mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[64, 128, 288, 672],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[72, 168, 408, 912],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[96, 192, 432, 1008],\n        out_channels=256,\n        num_outs=5))\nimg_norm_cfg = dict(\n    # The mean and std are used in PyCls when training RegNets\n    mean=[103.53, 116.28, 123.675],\n    std=[57.375, 57.12, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py",
    "content": "_base_ = './faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py'\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain_3x_coco.py', '../_base_/models/faster_rcnn_r50_fpn.py'\n]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[96, 192, 432, 1008],\n        out_channels=256,\n        num_outs=5))\nimg_norm_cfg = dict(\n    # The mean and std are used in PyCls when training RegNets\n    mean=[103.53, 116.28, 123.675],\n    std=[57.375, 57.12, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\noptimizer = dict(weight_decay=0.00005)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_400mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_400mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[32, 64, 160, 384],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_4.0gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[80, 240, 560, 1360],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = 'faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_800mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[64, 128, 288, 672],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[72, 168, 408, 912],\n        out_channels=256,\n        num_outs=5))\n\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_12gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_12gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[224, 448, 896, 2240],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[96, 192, 432, 1008],\n        out_channels=256,\n        num_outs=5))\nimg_norm_cfg = dict(\n    # The mean and std are used in PyCls when training RegNets\n    mean=[103.53, 116.28, 123.675],\n    std=[57.375, 57.12, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    # Images are converted to float32 directly after loading in PyCls\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py",
    "content": "_base_ = 'mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[96, 192, 432, 1008],\n        out_channels=256,\n        num_outs=5))\nimg_norm_cfg = dict(\n    # The mean and std are used in PyCls when training RegNets\n    mean=[103.53, 116.28, 123.675],\n    std=[57.375, 57.12, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_400mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_400mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[32, 64, 160, 384],\n        out_channels=256,\n        num_outs=5))\n\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_4.0gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[80, 240, 560, 1360],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_4.0gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_4.0gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[80, 240, 560, 1360],\n        out_channels=256,\n        num_outs=5))\n\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_6.4gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_6.4gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[168, 392, 784, 1624],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py",
    "content": "_base_ = [\n    '../common/mstrain-poly_3x_coco_instance.py',\n    '../_base_/models/mask_rcnn_r50_fpn.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_800mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[64, 128, 288, 672],\n        out_channels=256,\n        num_outs=5))\n\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py",
    "content": "_base_ = './mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_8.0gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_8.0gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[80, 240, 720, 1920],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/metafile.yml",
    "content": "Models:\n  - Name: mask_rcnn_regnetx-3.2GF_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_1x_coco_20200520_163141-2a9d1814.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-4GF_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_1x_coco/mask_rcnn_regnetx-4GF_fpn_1x_coco_20200517_180217-32e9c92d.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-6.4GF_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.1\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-6.4GF_fpn_1x_coco/mask_rcnn_regnetx-6.4GF_fpn_1x_coco_20200517_180439-3a7aae83.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-8GF_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-8GF_fpn_1x_coco/mask_rcnn_regnetx-8GF_fpn_1x_coco_20200517_180515-09daa87e.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-12GF_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.4\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-12GF_fpn_1x_coco/mask_rcnn_regnetx-12GF_fpn_1x_coco_20200517_180552-b538bd8b.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco/mask_rcnn_regnetx-3.2GF_fpn_mdconv_c3-c5_1x_coco_20200520_172726-75f40794.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-3.2GF_fpn_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_1x_coco/faster_rcnn_regnetx-3.2GF_fpn_1x_coco_20200517_175927-126fd9bf.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-3.2GF_fpn_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_2x_coco/faster_rcnn_regnetx-3.2GF_fpn_2x_coco_20200520_223955-e2081918.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: retinanet_regnetx-800MF_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 2.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 35.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-800MF_fpn_1x_coco/retinanet_regnetx-800MF_fpn_1x_coco_20200517_191403-f6f91d10.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: retinanet_regnetx-1.6GF_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.3\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco/retinanet_regnetx-1.6GF_fpn_1x_coco_20200517_191403-37009a9d.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: retinanet_regnetx-3.2GF_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.2\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco/retinanet_regnetx-3.2GF_fpn_1x_coco_20200520_163141-cb1509e8.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 2.3\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210526_095112-e1967c37.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 2.8\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210526_095118-a2c70b20.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 3.4\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-1_20210526_095325-94aa46cc.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.4\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-3_20210526_095152-e16a5227.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco\n    In Collection: Faster R-CNN\n    Config: configs/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.9\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/faster_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210526_095201-65eaf841.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 2.5\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 34.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-400MF_fpn_mstrain-poly_3x_coco_20210601_235443-8aac57a4.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 2.9\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-800MF_fpn_mstrain-poly_3x_coco_20210602_210641-715d51f5.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 3.6\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.9\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-1.6GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-1_20210602_210641-6764cff5.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco_20200521_202221-99879813.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.1\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco/mask_rcnn_regnetx-4GF_fpn_mstrain-poly_3x_coco_20210602_032621-00f0331c.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.3\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-400MF_fpn_mstrain_3x_coco_20210715_211619-5142f449.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 4.8\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-800MF_fpn_mstrain_3x_coco_20210715_211616-dcbd13f4.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.4\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-1.6GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-1_20210715_211616-75f29a61.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 6.4\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-3.2GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-3_20210715_211616-b9c2c58b.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n\n  - Name: cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco.py\n    Metadata:\n      Training Memory (GB): 6.9\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - RegNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/regnet/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco/cascade_mask_rcnn_regnetx-4GF_fpn_mstrain_3x_coco_20210715_212034-cbb1be4c.pth\n    Paper:\n      URL: https://arxiv.org/abs/2003.13678\n      Title: 'Designing Network Design Spaces'\n    README: configs/regnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/regnet.py#L11\n      Version: v2.1.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/retinanet_regnetx-1.6GF_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_1.6gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_1.6gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[72, 168, 408, 912],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/retinanet_regnetx-3.2GF_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='RegNet',\n        arch='regnetx_3.2gf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_3.2gf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[96, 192, 432, 1008],\n        out_channels=256,\n        num_outs=5))\nimg_norm_cfg = dict(\n    # The mean and std are used in PyCls when training RegNets\n    mean=[103.53, 116.28, 123.675],\n    std=[57.375, 57.12, 58.395],\n    to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\noptimizer = dict(type='SGD', lr=0.02, momentum=0.9, weight_decay=0.00005)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/regnet/retinanet_regnetx-800MF_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_regnetx-3.2GF_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='RegNet',\n        arch='regnetx_800mf',\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://regnetx_800mf')),\n    neck=dict(\n        type='FPN',\n        in_channels=[64, 128, 288, 672],\n        out_channels=256,\n        num_outs=5))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py'\nmodel = dict(bbox_head=dict(transform_method='minmax', use_grid_points=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py'\nmodel = dict(\n    bbox_head=dict(transform_method='minmax', use_grid_points=True),\n    # training and testing settings\n    train_cfg=dict(\n        init=dict(\n            assigner=dict(\n                _delete_=True,\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.4,\n                min_pos_iou=0,\n                ignore_iof_thr=-1))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/metafile.yml",
    "content": "Collections:\n  - Name: RepPoints\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Group Normalization\n        - FPN\n        - RepPoints\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1904.11490\n      Title: 'RepPoints: Point Set Representation for Object Detection'\n    README: configs/reppoints/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/reppoints_detector.py#L9\n      Version: v2.0.0\n\nModels:\n  - Name: bbox_r50_grid_fpn_gn-neck+head_1x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/bbox_r50_grid_fpn_gn-neck+head_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.9\n      inference time (ms/im):\n        - value: 62.89\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329_145916-0eedf8d1.pth\n\n  - Name: bbox_r50_grid_center_fpn_gn-neck+head_1x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/bbox_r50_grid_center_fpn_gn-neck+head_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.9\n      inference time (ms/im):\n        - value: 64.94\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco/bbox_r50_grid_fpn_gn-neck%2Bhead_1x_coco_20200329_145916-0eedf8d1.pth\n\n  - Name: reppoints_moment_r50_fpn_1x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.3\n      inference time (ms/im):\n        - value: 54.05\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_1x_coco/reppoints_moment_r50_fpn_1x_coco_20200330-b73db8d1.pth\n\n  - Name: reppoints_moment_r50_fpn_gn-neck+head_1x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.9\n      inference time (ms/im):\n        - value: 57.14\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_1x_coco_20200329_145952-3e51b550.pth\n\n  - Name: reppoints_moment_r50_fpn_gn-neck+head_2x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 3.9\n      inference time (ms/im):\n        - value: 57.14\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r50_fpn_gn-neck%2Bhead_2x_coco_20200329-91babaa2.pth\n\n  - Name: reppoints_moment_r101_fpn_gn-neck+head_2x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 5.8\n      inference time (ms/im):\n        - value: 72.99\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_gn-neck%2Bhead_2x_coco_20200329-4fbc7310.pth\n\n  - Name: reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 5.9\n      inference time (ms/im):\n        - value: 82.64\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-3309fbf2.pth\n\n  - Name: reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco\n    In Collection: RepPoints\n    Config: configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      inference time (ms/im):\n        - value: 107.53\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck%2Bhead_2x_coco_20200329-f87da1ea.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_minmax_r50_fpn_gn-neck+head_1x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py'\nmodel = dict(bbox_head=dict(transform_method='minmax'))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_moment_r101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_moment_r101_fpn_gn-neck+head_2x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_moment_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='RepPointsDetector',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_input',\n        num_outs=5),\n    bbox_head=dict(\n        type='RepPointsHead',\n        num_classes=80,\n        in_channels=256,\n        feat_channels=256,\n        point_feat_channels=256,\n        stacked_convs=3,\n        num_points=9,\n        gradient_mul=0.1,\n        point_strides=[8, 16, 32, 64, 128],\n        point_base_scale=4,\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_init=dict(type='SmoothL1Loss', beta=0.11, loss_weight=0.5),\n        loss_bbox_refine=dict(type='SmoothL1Loss', beta=0.11, loss_weight=1.0),\n        transform_method='moment'),\n    # training and testing settings\n    train_cfg=dict(\n        init=dict(\n            assigner=dict(type='PointAssigner', scale=4, pos_num=1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False),\n        refine=dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.4,\n                min_pos_iou=0,\n                ignore_iof_thr=-1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False)),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.5),\n        max_per_img=100))\noptimizer = dict(lr=0.01)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(neck=dict(norm_cfg=norm_cfg), bbox_head=dict(norm_cfg=norm_cfg))\noptimizer = dict(lr=0.01)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py'\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_moment_x101_fpn_dconv_c3-c5_gn-neck+head_2x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/reppoints/reppoints_partial_minmax_r50_fpn_gn-neck+head_1x_coco.py",
    "content": "_base_ = './reppoints_moment_r50_fpn_gn-neck+head_1x_coco.py'\nmodel = dict(bbox_head=dict(transform_method='partial_minmax'))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/res2net/htc_r2_101_fpn_20e_coco.py",
    "content": "_base_ = '../htc/htc_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/res2net/metafile.yml",
    "content": "Models:\n  - Name: faster_rcnn_r2_101_fpn_2x_coco\n    In Collection: Faster R-CNN\n    Config: configs/res2net/faster_rcnn_r2_101_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.4\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Res2Net\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/faster_rcnn_r2_101_fpn_2x_coco/faster_rcnn_r2_101_fpn_2x_coco-175f1da6.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.01169\n      Title: 'Res2Net for object detection and instance segmentation'\n    README: configs/res2net/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239\n      Version: v2.1.0\n\n  - Name: mask_rcnn_r2_101_fpn_2x_coco\n    In Collection: Mask R-CNN\n    Config: configs/res2net/mask_rcnn_r2_101_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.9\n      Epochs: 24\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Res2Net\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/mask_rcnn_r2_101_fpn_2x_coco/mask_rcnn_r2_101_fpn_2x_coco-17f061e8.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.01169\n      Title: 'Res2Net for object detection and instance segmentation'\n    README: configs/res2net/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239\n      Version: v2.1.0\n\n  - Name: cascade_rcnn_r2_101_fpn_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/res2net/cascade_rcnn_r2_101_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Res2Net\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_rcnn_r2_101_fpn_20e_coco/cascade_rcnn_r2_101_fpn_20e_coco-f4b7b7db.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.01169\n      Title: 'Res2Net for object detection and instance segmentation'\n    README: configs/res2net/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239\n      Version: v2.1.0\n\n  - Name: cascade_mask_rcnn_r2_101_fpn_20e_coco\n    In Collection: Cascade R-CNN\n    Config: configs/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 9.5\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Res2Net\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/cascade_mask_rcnn_r2_101_fpn_20e_coco/cascade_mask_rcnn_r2_101_fpn_20e_coco-8a7b41e1.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.01169\n      Title: 'Res2Net for object detection and instance segmentation'\n    README: configs/res2net/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239\n      Version: v2.1.0\n\n  - Name: htc_r2_101_fpn_20e_coco\n    In Collection: HTC\n    Config: configs/res2net/htc_r2_101_fpn_20e_coco.py\n    Metadata:\n      Epochs: 20\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Res2Net\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/res2net/htc_r2_101_fpn_20e_coco/htc_r2_101_fpn_20e_coco-3a8d2112.pth\n    Paper:\n      URL: https://arxiv.org/abs/1904.01169\n      Title: 'Res2Net for object detection and instance segmentation'\n    README: configs/res2net/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.1.0/mmdet/models/backbones/res2net.py#L239\n      Version: v2.1.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py",
    "content": "_base_ = './cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        stem_channels=128,\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='open-mmlab://resnest101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeSt',\n        stem_channels=64,\n        depth=50,\n        radix=2,\n        reduction_factor=4,\n        avg_down_stride=True,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest50')),\n    roi_head=dict(\n        bbox_head=[\n            dict(\n                type='Shared4Conv1FCBBoxHead',\n                in_channels=256,\n                conv_out_channels=256,\n                fc_out_channels=1024,\n                norm_cfg=norm_cfg,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared4Conv1FCBBoxHead',\n                in_channels=256,\n                conv_out_channels=256,\n                fc_out_channels=1024,\n                norm_cfg=norm_cfg,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared4Conv1FCBBoxHead',\n                in_channels=256,\n                conv_out_channels=256,\n                fc_out_channels=1024,\n                norm_cfg=norm_cfg,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_head=dict(norm_cfg=norm_cfg)))\n# # use ResNeSt img_norm\nimg_norm_cfg = dict(\n    mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py",
    "content": "_base_ = './cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        stem_channels=128,\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='open-mmlab://resnest101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py",
    "content": "_base_ = '../cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeSt',\n        stem_channels=64,\n        depth=50,\n        radix=2,\n        reduction_factor=4,\n        avg_down_stride=True,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest50')),\n    roi_head=dict(\n        bbox_head=[\n            dict(\n                type='Shared4Conv1FCBBoxHead',\n                in_channels=256,\n                conv_out_channels=256,\n                fc_out_channels=1024,\n                norm_cfg=norm_cfg,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared4Conv1FCBBoxHead',\n                in_channels=256,\n                conv_out_channels=256,\n                fc_out_channels=1024,\n                norm_cfg=norm_cfg,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared4Conv1FCBBoxHead',\n                in_channels=256,\n                conv_out_channels=256,\n                fc_out_channels=1024,\n                norm_cfg=norm_cfg,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ], ))\n# # use ResNeSt img_norm\nimg_norm_cfg = dict(\n    mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=False,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py",
    "content": "_base_ = './faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        stem_channels=128,\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='open-mmlab://resnest101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py",
    "content": "_base_ = '../faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeSt',\n        stem_channels=64,\n        depth=50,\n        radix=2,\n        reduction_factor=4,\n        avg_down_stride=True,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest50')),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=norm_cfg)))\n# # use ResNeSt img_norm\nimg_norm_cfg = dict(\n    mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=False,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py",
    "content": "_base_ = './mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        stem_channels=128,\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='open-mmlab://resnest101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py",
    "content": "_base_ = '../mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py'\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        type='ResNeSt',\n        stem_channels=64,\n        depth=50,\n        radix=2,\n        reduction_factor=4,\n        avg_down_stride=True,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=norm_cfg,\n        norm_eval=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://resnest50')),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=norm_cfg),\n        mask_head=dict(norm_cfg=norm_cfg)))\n# # use ResNeSt img_norm\nimg_norm_cfg = dict(\n    mean=[123.68, 116.779, 103.939], std=[58.393, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='LoadAnnotations',\n        with_bbox=True,\n        with_mask=True,\n        poly2mask=False),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnest/metafile.yml",
    "content": "Models:\n  - Name: faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/resnest/faster_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.8\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20200926_125502-20289c16.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/resnest/faster_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/faster_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201006_021058-421517f1.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/resnest/mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.6\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20200926_125503-8a2c3d47.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/resnest/mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_215831-af60cdf9.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/resnest/cascade_rcnn_s50_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py\n    Metadata:\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201122_213640-763cc7b5.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/resnest/cascade_rcnn_s101_fpn_syncbn-backbone+head_mstrain-range_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.4\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco/cascade_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain-range_1x_coco_20201005_113242-b9459f8f.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone+head_mstrain_1x_coco.py\n    Metadata:\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.4\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s50_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201122_104428-99eca4c7.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n\n  - Name: cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone+head_mstrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNeSt\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnest/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco/cascade_mask_rcnn_s101_fpn_syncbn-backbone%2Bhead_mstrain_1x_coco_20201005_113243-42607475.pth\n    Paper:\n      URL: https://arxiv.org/abs/2004.08955\n      Title: 'ResNeSt: Split-Attention Networks'\n    README: configs/resnest/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.7.0/mmdet/models/backbones/resnest.py#L273\n      Version: v2.7.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\ncheckpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        init_cfg=dict(\n            type='Pretrained', prefix='backbone.', checkpoint=checkpoint)))\n\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0002,\n    weight_decay=0.05,\n    paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\ncheckpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        init_cfg=dict(\n            type='Pretrained', prefix='backbone.', checkpoint=checkpoint)))\n\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0002,\n    weight_decay=0.05,\n    paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\ncheckpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        init_cfg=dict(\n            type='Pretrained', prefix='backbone.', checkpoint=checkpoint)))\n\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0002,\n    weight_decay=0.05,\n    paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnet_strikes_back/metafile.yml",
    "content": "Models:\n  - Name: faster_rcnn_r50_fpn_rsb-pretrain_1x_coco\n    In Collection: Faster R-CNN\n    Config: configs/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.9\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco/faster_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_162229-32ae82a9.pth\n    Paper:\n      URL: https://arxiv.org/abs/2110.00476\n      Title: 'ResNet strikes back: An improved training procedure in timm'\n    README: configs/resnet_strikes_back/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md\n      Version: v2.22.0\n\n  - Name: cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco\n    In Collection: Cascade R-CNN\n    Config: configs/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 6.2\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/cascade_mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_193636-8b9ad50f.pth\n    Paper:\n      URL: https://arxiv.org/abs/2110.00476\n      Title: 'ResNet strikes back: An improved training procedure in timm'\n    README: configs/resnet_strikes_back/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md\n      Version: v2.22.0\n\n  - Name: retinanet_r50_fpn_rsb-pretrain_1x_coco\n    In Collection: RetinaNet\n    Config: configs/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.8\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco/retinanet_r50_fpn_rsb-pretrain_1x_coco_20220113_175432-bd24aae9.pth\n    Paper:\n      URL: https://arxiv.org/abs/2110.00476\n      Title: 'ResNet strikes back: An improved training procedure in timm'\n    README: configs/resnet_strikes_back/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md\n      Version: v2.22.0\n\n  - Name: mask_rcnn_r50_fpn_rsb-pretrain_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.5\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 38.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/resnet_strikes_back/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco/mask_rcnn_r50_fpn_rsb-pretrain_1x_coco_20220113_174054-06ce8ba0.pth\n    Paper:\n      URL: https://arxiv.org/abs/2110.00476\n      Title: 'ResNet strikes back: An improved training procedure in timm'\n    README: configs/resnet_strikes_back/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.22.0/configs/resnet_strikes_back/README.md\n      Version: v2.22.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/resnet_strikes_back/retinanet_r50_fpn_rsb-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\ncheckpoint = 'https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb256-rsb-a1-600e_in1k_20211228-20e21305.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        init_cfg=dict(\n            type='Pretrained', prefix='backbone.', checkpoint=checkpoint)))\n\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0001,\n    weight_decay=0.05,\n    paramwise_cfg=dict(norm_decay_mult=0., bypass_duplicate=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/metafile.yml",
    "content": "Collections:\n  - Name: RetinaNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Focal Loss\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1708.02002\n      Title: \"Focal Loss for Dense Object Detection\"\n    README: configs/retinanet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/mmdet/models/detectors/retinanet.py#L6\n      Version: v2.0.0\n\nModels:\n  - Name: retinanet_r18_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r18_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 1.7\n      Training Resources: 8x V100 GPUs\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 31.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x_coco/retinanet_r18_fpn_1x_coco_20220407_171055-614fd399.pth\n\n  - Name: retinanet_r18_fpn_1x8_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.0\n      Training Resources:  1x V100 GPUs\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 31.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r18_fpn_1x8_1x_coco/retinanet_r18_fpn_1x8_1x_coco_20220407_171255-4ea310d7.pth\n\n  - Name: retinanet_r50_caffe_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.5\n      inference time (ms/im):\n        - value: 53.76\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_caffe_fpn_1x_coco/retinanet_r50_caffe_fpn_1x_coco_20200531-f11027c5.pth\n\n  - Name: retinanet_r50_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 3.8\n      inference time (ms/im):\n        - value: 52.63\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_1x_coco/retinanet_r50_fpn_1x_coco_20200130-c2398f9e.pth\n\n  - Name: retinanet_r50_fpn_fp16_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py\n    Metadata:\n      Training Memory (GB): 2.8\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n        - Mixed Precision Training\n      inference time (ms/im):\n        - value: 31.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP16\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 36.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/fp16/retinanet_r50_fpn_fp16_1x_coco/retinanet_r50_fpn_fp16_1x_coco_20200702-0dbfb212.pth\n\n  - Name: retinanet_r50_fpn_2x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r50_fpn_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_2x_coco/retinanet_r50_fpn_2x_coco_20200131-fdb43119.pth\n\n  - Name: retinanet_r50_fpn_mstrain_640-800_3x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r50_fpn_mstrain_640-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r50_fpn_mstrain_3x_coco/retinanet_r50_fpn_mstrain_3x_coco_20210718_220633-88476508.pth\n\n  - Name: retinanet_r101_caffe_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.5\n      inference time (ms/im):\n        - value: 68.03\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_1x_coco/retinanet_r101_caffe_fpn_1x_coco_20200531-b428fa0f.pth\n\n  - Name: retinanet_r101_caffe_fpn_mstrain_3x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco/retinanet_r101_caffe_fpn_mstrain_3x_coco_20210721_063439-88a8a944.pth\n\n  - Name: retinanet_r101_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r101_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.7\n      inference time (ms/im):\n        - value: 66.67\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_1x_coco/retinanet_r101_fpn_1x_coco_20200130-7a93545f.pth\n\n  - Name: retinanet_r101_fpn_2x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r101_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 5.7\n      inference time (ms/im):\n        - value: 66.67\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_2x_coco/retinanet_r101_fpn_2x_coco_20200131-5560aee8.pth\n\n  - Name: retinanet_r101_fpn_mstrain_640-800_3x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_r101_fpn_mstrain_640-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_r101_fpn_mstrain_3x_coco/retinanet_r101_fpn_mstrain_3x_coco_20210720_214650-7ee888e0.pth\n\n  - Name: retinanet_x101_32x4d_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 82.64\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_1x_coco/retinanet_x101_32x4d_fpn_1x_coco_20200130-5c8b7ec4.pth\n\n  - Name: retinanet_x101_32x4d_fpn_2x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 82.64\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_32x4d_fpn_2x_coco/retinanet_x101_32x4d_fpn_2x_coco_20200131-237fc5e1.pth\n\n  - Name: retinanet_x101_64x4d_fpn_1x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 10.0\n      inference time (ms/im):\n        - value: 114.94\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_1x_coco/retinanet_x101_64x4d_fpn_1x_coco_20200130-366f5af1.pth\n\n  - Name: retinanet_x101_64x4d_fpn_2x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.0\n      inference time (ms/im):\n        - value: 114.94\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_2x_coco/retinanet_x101_64x4d_fpn_2x_coco_20200131-bca068ab.pth\n\n  - Name: retinanet_x101_64x4d_fpn_mstrain_640-800_3x_coco\n    In Collection: RetinaNet\n    Config: configs/retinanet/retinanet_x101_64x4d_fpn_mstrain_640-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/retinanet/retinanet_x101_64x4d_fpn_mstrain_3x_coco/retinanet_x101_64x4d_fpn_mstrain_3x_coco_20210719_051838-022c2187.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r101_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './retinanet_r50_caffe_fpn_mstrain_1x_coco.py'\n# learning policy\nmodel = dict(\n    pretrained='open-mmlab://detectron2/resnet101_caffe',\n    backbone=dict(depth=101))\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r101_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r101_fpn_2x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r101_fpn_mstrain_640-800_3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py', '../common/mstrain_3x_coco.py'\n]\n# optimizer\nmodel = dict(pretrained='torchvision://resnet101', backbone=dict(depth=101))\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r18_fpn_1x8_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# data\ndata = dict(samples_per_gpu=8)\n\n# optimizer\nmodel = dict(\n    backbone=dict(\n        depth=18,\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),\n    neck=dict(in_channels=[64, 128, 256, 512]))\n\n# Note: If the learning rate is set to 0.0025, the mAP will be 32.4.\noptimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0001)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (1 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r18_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# optimizer\nmodel = dict(\n    backbone=dict(\n        depth=18,\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet18')),\n    neck=dict(in_channels=[64, 128, 256, 512]))\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (2 samples per GPU)\nauto_scale_lr = dict(base_batch_size=16)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_1x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './retinanet_r50_caffe_fpn_mstrain_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 23])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_caffe_fpn_mstrain_3x_coco.py",
    "content": "_base_ = './retinanet_r50_caffe_fpn_mstrain_1x_coco.py'\n# learning policy\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_fpn_2x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_fpn_90k_coco.py",
    "content": "_base_ = 'retinanet_r50_fpn_1x_coco.py'\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    step=[60000, 80000])\n\n# Runner type\nrunner = dict(_delete_=True, type='IterBasedRunner', max_iters=90000)\n\ncheckpoint_config = dict(interval=10000)\nevaluation = dict(interval=10000, metric='bbox')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_fpn_fp16_1x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\n# fp16 settings\nfp16 = dict(loss_scale=512.)\n\n# set grad_norm for stability during mixed-precision training\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_r50_fpn_mstrain_640-800_3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py', '../common/mstrain_3x_coco.py'\n]\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_x101_32x4d_fpn_2x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_x101_64x4d_fpn_2x_coco.py",
    "content": "_base_ = './retinanet_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/retinanet/retinanet_x101_64x4d_fpn_mstrain_640-800_3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py', '../common/mstrain_3x_coco.py'\n]\n# optimizer\nmodel = dict(\n    pretrained='open-mmlab://resnext101_64x4d',\n    backbone=dict(type='ResNeXt', depth=101, groups=64, base_width=4))\noptimizer = dict(type='SGD', lr=0.01)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r101_caffe_fpn_1x_coco.py",
    "content": "_base_ = './rpn_r50_caffe_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet101_caffe')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r101_fpn_1x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r101_fpn_2x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r50_caffe_c4_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/rpn_r50_caffe_c4.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_label=False),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\nevaluation = dict(interval=1, metric='proposal_fast')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r50_caffe_fpn_1x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        norm_cfg=dict(requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')))\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_label=False),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/rpn_r50_fpn.py', '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_label=False),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\nevaluation = dict(interval=1, metric='proposal_fast')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_r50_fpn_2x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_1x_coco.py'\n\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_x101_32x4d_fpn_1x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_x101_32x4d_fpn_2x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_x101_64x4d_fpn_1x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/rpn/rpn_x101_64x4d_fpn_2x_coco.py",
    "content": "_base_ = './rpn_r50_fpn_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/metafile.yml",
    "content": "Collections:\n  - Name: SABL\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n        - SABL\n    Paper:\n      URL: https://arxiv.org/abs/1912.04260\n      Title: 'Side-Aware Boundary Localization for More Precise Object Detection'\n    README: configs/sabl/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.4.0/mmdet/models/roi_heads/bbox_heads/sabl_head.py#L14\n      Version: v2.4.0\n\nModels:\n  - Name: sabl_faster_rcnn_r50_fpn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r50_fpn_1x_coco/sabl_faster_rcnn_r50_fpn_1x_coco-e867595b.pth\n\n  - Name: sabl_faster_rcnn_r101_fpn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_faster_rcnn_r101_fpn_1x_coco/sabl_faster_rcnn_r101_fpn_1x_coco-f804c6c1.pth\n\n  - Name: sabl_cascade_rcnn_r50_fpn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco/sabl_cascade_rcnn_r50_fpn_1x_coco-e1748e5e.pth\n\n  - Name: sabl_cascade_rcnn_r101_fpn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco/sabl_cascade_rcnn_r101_fpn_1x_coco-2b83e87c.pth\n\n  - Name: sabl_retinanet_r50_fpn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_1x_coco/sabl_retinanet_r50_fpn_1x_coco-6c54fd4f.pth\n\n  - Name: sabl_retinanet_r50_fpn_gn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 38.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r50_fpn_gn_1x_coco/sabl_retinanet_r50_fpn_gn_1x_coco-e16dfcf1.pth\n\n  - Name: sabl_retinanet_r101_fpn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 39.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_1x_coco/sabl_retinanet_r101_fpn_1x_coco-42026904.pth\n\n  - Name: sabl_retinanet_r101_fpn_gn_1x_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_1x_coco/sabl_retinanet_r101_fpn_gn_1x_coco-40a893e8.pth\n\n  - Name: sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco-1e63382c.pth\n\n  - Name: sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco\n    In Collection: SABL\n    Config: configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco-5342f857.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_cascade_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    roi_head=dict(bbox_head=[\n        dict(\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1,\n                               loss_weight=1.0)),\n        dict(\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.5),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1,\n                               loss_weight=1.0)),\n        dict(\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.3),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0))\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_cascade_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    roi_head=dict(bbox_head=[\n        dict(\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1,\n                               loss_weight=1.0)),\n        dict(\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.5),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1,\n                               loss_weight=1.0)),\n        dict(\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.3),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1, loss_weight=1.0))\n    ]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_faster_rcnn_r101_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    roi_head=dict(\n        bbox_head=dict(\n            _delete_=True,\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1,\n                               loss_weight=1.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_faster_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            _delete_=True,\n            type='SABLHead',\n            num_classes=80,\n            cls_in_channels=256,\n            reg_in_channels=256,\n            roi_feat_size=7,\n            reg_feat_up_ratio=2,\n            reg_pre_kernel=3,\n            reg_post_kernel=3,\n            reg_pre_num=2,\n            reg_post_num=1,\n            cls_out_channels=1024,\n            reg_offset_out_channels=256,\n            reg_cls_out_channels=256,\n            num_cls_fcs=1,\n            num_reg_fcs=0,\n            reg_class_agnostic=True,\n            norm_cfg=None,\n            bbox_coder=dict(\n                type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n            loss_bbox_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox_reg=dict(type='SmoothL1Loss', beta=0.1,\n                               loss_weight=1.0))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_retinanet_r101_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    bbox_head=dict(\n        _delete_=True,\n        type='SABLRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5),\n        loss_bbox_reg=dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_retinanet_r101_fpn_gn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    bbox_head=dict(\n        _delete_=True,\n        type='SABLRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        norm_cfg=norm_cfg,\n        bbox_coder=dict(\n            type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5),\n        loss_bbox_reg=dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_480_960_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    bbox_head=dict(\n        _delete_=True,\n        type='SABLRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        norm_cfg=norm_cfg,\n        bbox_coder=dict(\n            type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5),\n        loss_bbox_reg=dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 960)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_retinanet_r101_fpn_gn_2x_ms_640_800_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    bbox_head=dict(\n        _delete_=True,\n        type='SABLRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        norm_cfg=norm_cfg,\n        bbox_coder=dict(\n            type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5),\n        loss_bbox_reg=dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_retinanet_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    bbox_head=dict(\n        _delete_=True,\n        type='SABLRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5),\n        loss_bbox_reg=dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sabl/sabl_retinanet_r50_fpn_gn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    bbox_head=dict(\n        _delete_=True,\n        type='SABLRetinaHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]),\n        norm_cfg=norm_cfg,\n        bbox_coder=dict(\n            type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5),\n        loss_bbox_reg=dict(\n            type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='ApproxMaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.0,\n            ignore_iof_thr=-1),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scnet/metafile.yml",
    "content": "Collections:\n  - Name: SCNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n        - SCNet\n    Paper:\n      URL: https://arxiv.org/abs/2012.10150\n      Title: 'SCNet: Training Inference Sample Consistency for Instance Segmentation'\n    README: configs/scnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.9.0/mmdet/models/detectors/scnet.py#L6\n      Version: v2.9.0\n\nModels:\n  - Name: scnet_r50_fpn_1x_coco\n    In Collection: SCNet\n    Config: configs/scnet/scnet_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 161.29\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_1x_coco/scnet_r50_fpn_1x_coco-c3f09857.pth\n\n  - Name: scnet_r50_fpn_20e_coco\n    In Collection: SCNet\n    Config: configs/scnet/scnet_r50_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 7.0\n      inference time (ms/im):\n        - value: 161.29\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_20e_coco/scnet_r50_fpn_20e_coco-a569f645.pth\n\n  - Name: scnet_r101_fpn_20e_coco\n    In Collection: SCNet\n    Config: configs/scnet/scnet_r101_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 8.9\n      inference time (ms/im):\n        - value: 172.41\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r101_fpn_20e_coco/scnet_r101_fpn_20e_coco-294e312c.pth\n\n  - Name: scnet_x101_64x4d_fpn_20e_coco\n    In Collection: SCNet\n    Config: configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py\n    Metadata:\n      Training Memory (GB): 13.2\n      inference time (ms/im):\n        - value: 204.08\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (800, 1333)\n      Epochs: 20\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 42.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_x101_64x4d_fpn_20e_coco/scnet_x101_64x4d_fpn_20e_coco-fb09dec9.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scnet/scnet_r101_fpn_20e_coco.py",
    "content": "_base_ = './scnet_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scnet/scnet_r50_fpn_1x_coco.py",
    "content": "_base_ = '../htc/htc_r50_fpn_1x_coco.py'\n# model settings\nmodel = dict(\n    type='SCNet',\n    roi_head=dict(\n        _delete_=True,\n        type='SCNetRoIHead',\n        num_stages=3,\n        stage_loss_weights=[1, 0.5, 0.25],\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='SCNetBBoxHead',\n                num_shared_fcs=2,\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='SCNetBBoxHead',\n                num_shared_fcs=2,\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='SCNetBBoxHead',\n                num_shared_fcs=2,\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=80,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                loss_cls=dict(\n                    type='CrossEntropyLoss',\n                    use_sigmoid=False,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        mask_head=dict(\n            type='SCNetMaskHead',\n            num_convs=12,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            conv_to_res=True,\n            loss_mask=dict(\n                type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)),\n        semantic_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0),\n            out_channels=256,\n            featmap_strides=[8]),\n        semantic_head=dict(\n            type='SCNetSemanticHead',\n            num_ins=5,\n            fusion_level=1,\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=183,\n            loss_seg=dict(\n                type='CrossEntropyLoss', ignore_index=255, loss_weight=0.2),\n            conv_to_res=True),\n        glbctx_head=dict(\n            type='GlobalContextHead',\n            num_convs=4,\n            in_channels=256,\n            conv_out_channels=256,\n            num_classes=80,\n            loss_weight=3.0,\n            conv_to_res=True),\n        feat_relay_head=dict(\n            type='FeatureRelayHead',\n            in_channels=1024,\n            out_conv_channels=256,\n            roi_feat_size=7,\n            scale_factor=2)))\n\n# uncomment below code to enable test time augmentations\n# img_norm_cfg = dict(\n#     mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n# test_pipeline = [\n#     dict(type='LoadImageFromFile'),\n#     dict(\n#         type='MultiScaleFlipAug',\n#         img_scale=[(600, 900), (800, 1200), (1000, 1500), (1200, 1800),\n#                    (1400, 2100)],\n#         flip=True,\n#         transforms=[\n#             dict(type='Resize', keep_ratio=True),\n#             dict(type='RandomFlip', flip_ratio=0.5),\n#             dict(type='Normalize', **img_norm_cfg),\n#             dict(type='Pad', size_divisor=32),\n#             dict(type='ImageToTensor', keys=['img']),\n#             dict(type='Collect', keys=['img']),\n#         ])\n# ]\n# data = dict(\n#     val=dict(pipeline=test_pipeline),\n#     test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scnet/scnet_r50_fpn_20e_coco.py",
    "content": "_base_ = './scnet_r50_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 19])\nrunner = dict(type='EpochBasedRunner', max_epochs=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py",
    "content": "_base_ = './scnet_r50_fpn_20e_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scnet/scnet_x101_64x4d_fpn_8x1_20e_coco.py",
    "content": "_base_ = './scnet_x101_64x4d_fpn_20e_coco.py'\ndata = dict(samples_per_gpu=1, workers_per_gpu=1)\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (1 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        frozen_stages=-1,\n        zero_init_residual=False,\n        norm_cfg=norm_cfg,\n        init_cfg=None),\n    neck=dict(norm_cfg=norm_cfg),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=norm_cfg)))\n# optimizer\noptimizer = dict(paramwise_cfg=dict(norm_decay_mult=0))\noptimizer_config = dict(_delete_=True, grad_clip=None)\n# learning policy\nlr_config = dict(warmup_ratio=0.1, step=[65, 71])\nrunner = dict(type='EpochBasedRunner', max_epochs=73)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nnorm_cfg = dict(type='GN', num_groups=32, requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        frozen_stages=-1,\n        zero_init_residual=False,\n        norm_cfg=norm_cfg,\n        init_cfg=None),\n    neck=dict(norm_cfg=norm_cfg),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=norm_cfg),\n        mask_head=dict(norm_cfg=norm_cfg)))\n# optimizer\noptimizer = dict(paramwise_cfg=dict(norm_decay_mult=0))\noptimizer_config = dict(_delete_=True, grad_clip=None)\n# learning policy\nlr_config = dict(warmup_ratio=0.1, step=[65, 71])\nrunner = dict(type='EpochBasedRunner', max_epochs=73)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/scratch/metafile.yml",
    "content": "Collections:\n  - Name: Rethinking ImageNet Pre-training\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - RPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1811.08883\n      Title: 'Rethinking ImageNet Pre-training'\n    README: configs/scratch/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.0.0/configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py\n      Version: v2.0.0\n\nModels:\n  - Name: faster_rcnn_r50_fpn_gn-all_scratch_6x_coco\n    In Collection: Rethinking ImageNet Pre-training\n    Config: configs/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco.py\n    Metadata:\n      Epochs: 72\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/scratch/faster_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_faster_rcnn_r50_fpn_gn_6x_bbox_mAP-0.407_20200201_193013-90813d01.pth\n\n  - Name: mask_rcnn_r50_fpn_gn-all_scratch_6x_coco\n    In Collection: Rethinking ImageNet Pre-training\n    Config: configs/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco.py\n    Metadata:\n      Epochs: 72\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/scratch/mask_rcnn_r50_fpn_gn-all_scratch_6x_coco/scratch_mask_rcnn_r50_fpn_gn_6x_bbox_mAP-0.412__segm_mAP-0.374_20200201_193051-1e190a40.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    roi_head=dict(\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=1203,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n                loss_cls=dict(\n                    type='SeesawLoss',\n                    p=0.8,\n                    q=2.0,\n                    num_classes=1203,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=1203,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n                loss_cls=dict(\n                    type='SeesawLoss',\n                    p=0.8,\n                    q=2.0,\n                    num_classes=1203,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=1203,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n                loss_cls=dict(\n                    type='SeesawLoss',\n                    p=0.8,\n                    q=2.0,\n                    num_classes=1203,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_head=dict(num_classes=1203)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.0001,\n            # LVIS allows up to 300\n            max_per_img=300)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndataset_type = 'LVISV1Dataset'\ndata_root = 'data/lvis_v1/'\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_train.json',\n        img_prefix=data_root,\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_val.json',\n        img_prefix=data_root,\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_val.json',\n        img_prefix=data_root,\n        pipeline=test_pipeline))\nevaluation = dict(interval=24, metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py'  # noqa: E501\nmodel = dict(\n    roi_head=dict(\n        mask_head=dict(\n            predictor_cfg=dict(type='NormedConv2d', tempearture=20))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py",
    "content": "_base_ = [\n    '../_base_/models/cascade_mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/lvis_v1_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')),\n    roi_head=dict(\n        bbox_head=[\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=1203,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.1, 0.1, 0.2, 0.2]),\n                reg_class_agnostic=True,\n                cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n                loss_cls=dict(\n                    type='SeesawLoss',\n                    p=0.8,\n                    q=2.0,\n                    num_classes=1203,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=1203,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.05, 0.05, 0.1, 0.1]),\n                reg_class_agnostic=True,\n                cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n                loss_cls=dict(\n                    type='SeesawLoss',\n                    p=0.8,\n                    q=2.0,\n                    num_classes=1203,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                               loss_weight=1.0)),\n            dict(\n                type='Shared2FCBBoxHead',\n                in_channels=256,\n                fc_out_channels=1024,\n                roi_feat_size=7,\n                num_classes=1203,\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.033, 0.033, 0.067, 0.067]),\n                reg_class_agnostic=True,\n                cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n                loss_cls=dict(\n                    type='SeesawLoss',\n                    p=0.8,\n                    q=2.0,\n                    num_classes=1203,\n                    loss_weight=1.0),\n                loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0))\n        ],\n        mask_head=dict(num_classes=1203)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.0001,\n            # LVIS allows up to 300\n            max_per_img=300)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(dataset=dict(pipeline=train_pipeline)))\nevaluation = dict(interval=24, metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py'  # noqa: E501\nmodel = dict(\n    roi_head=dict(\n        mask_head=dict(\n            predictor_cfg=dict(type='NormedConv2d', tempearture=20))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py'  # noqa: E501\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py'  # noqa: E501\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            num_classes=1203,\n            cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n            loss_cls=dict(\n                type='SeesawLoss',\n                p=0.8,\n                q=2.0,\n                num_classes=1203,\n                loss_weight=1.0)),\n        mask_head=dict(num_classes=1203)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.0001,\n            # LVIS allows up to 300\n            max_per_img=300)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndataset_type = 'LVISV1Dataset'\ndata_root = 'data/lvis_v1/'\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_train.json',\n        img_prefix=data_root,\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_val.json',\n        img_prefix=data_root,\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/lvis_v1_val.json',\n        img_prefix=data_root,\n        pipeline=test_pipeline))\nevaluation = dict(interval=24, metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py'\nmodel = dict(\n    roi_head=dict(\n        mask_head=dict(\n            predictor_cfg=dict(type='NormedConv2d', tempearture=20))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/lvis_v1_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    roi_head=dict(\n        bbox_head=dict(\n            num_classes=1203,\n            cls_predictor_cfg=dict(type='NormedLinear', tempearture=20),\n            loss_cls=dict(\n                type='SeesawLoss',\n                p=0.8,\n                q=2.0,\n                num_classes=1203,\n                loss_weight=1.0)),\n        mask_head=dict(num_classes=1203)),\n    test_cfg=dict(\n        rcnn=dict(\n            score_thr=0.0001,\n            # LVIS allows up to 300\n            max_per_img=300)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(dataset=dict(pipeline=train_pipeline)))\nevaluation = dict(interval=12, metric=['bbox', 'segm'])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py",
    "content": "_base_ = './mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py'\nmodel = dict(\n    roi_head=dict(\n        mask_head=dict(\n            predictor_cfg=dict(type='NormedConv2d', tempearture=20))))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/seesaw_loss/metafile.yml",
    "content": "Collections:\n  - Name: Seesaw Loss\n    Metadata:\n      Training Data: LVIS\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Softmax\n        - RPN\n        - Convolution\n        - Dense Connections\n        - FPN\n        - ResNet\n        - RoIAlign\n        - Seesaw Loss\n    Paper:\n      URL: https://arxiv.org/abs/2008.10032\n      Title: 'Seesaw Loss for Long-Tailed Instance Segmentation'\n    README: configs/seesaw_loss/README.md\n\nModels:\n  - Name: mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 25.6\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 25.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-a698dd3d.pth\n  - Name: mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 25.6\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 25.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-a1c11314.pth\n  - Name: mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 27.4\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 26.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-8e6e6dd5.pth\n  - Name: mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 27.2\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 27.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-a0b59c42.pth\n  - Name: mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 27.6\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 26.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-392a804b.pth\n  - Name: mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 27.6\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 26.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r50_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-cd0f6a12.pth\n  - Name: mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 28.9\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 27.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-e68eb464.pth\n  - Name: mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 28.9\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 28.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-1d817139.pth\n  - Name: cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 33.1\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 29.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_mstrain_2x_lvis_v1-71e2215e.pth\n  - Name: cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 33.0\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 30.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-8b5a6745.pth\n  - Name: cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 30.0\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 29.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_mstrain_2x_lvis_v1-5d8ca2a4.pth\n  - Name: cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1\n    In Collection: Seesaw Loss\n    Config: configs/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: LVIS v1\n        Metrics:\n          box AP: 32.8\n      - Task: Instance Segmentation\n        Dataset: LVIS v1\n        Metrics:\n          mask AP: 30.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/seesaw_loss/cascade_mask_rcnn_r101_fpn_sample1e-3_seesaw_loss_normed_mask_mstrain_2x_lvis_v1-c8551505.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        frozen_stages=0,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='./mocov2_r50_800ep_pretrain.pth')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/selfsup_pretrain/mask_rcnn_r50_fpn_mocov2-pretrain_ms-2x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        frozen_stages=0,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='./mocov2_r50_800ep_pretrain.pth')))\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])\n]\n\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        frozen_stages=0,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='./swav_800ep_pretrain.pth.tar')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/selfsup_pretrain/mask_rcnn_r50_fpn_swav-pretrain_ms-2x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    backbone=dict(\n        frozen_stages=0,\n        norm_cfg=dict(type='SyncBN', requires_grad=True),\n        norm_eval=False,\n        init_cfg=dict(\n            type='Pretrained', checkpoint='./swav_800ep_pretrain.pth.tar')))\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks'])\n]\n\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    # 270k iterations with batch_size 64 is roughly equivalent to 144 epochs\n    '../common/ssj_270k_coco_instance.py',\n]\n\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\n# Use MMSyncBN that handles empty tensor in head. It can be changed to\n# SyncBN after https://github.com/pytorch/pytorch/issues/36530 is fixed.\nhead_norm_cfg = dict(type='MMSyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(frozen_stages=-1, norm_eval=False, norm_cfg=norm_cfg),\n    neck=dict(norm_cfg=norm_cfg),\n    rpn_head=dict(num_convs=2),  # leads to 0.1+ mAP\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=head_norm_cfg),\n        mask_head=dict(norm_cfg=head_norm_cfg)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py'\n\n# lr steps at [0.9, 0.95, 0.975] of the maximum iterations\nlr_config = dict(\n    warmup_iters=500, warmup_ratio=0.067, step=[81000, 85500, 87750])\n# 90k iterations with batch_size 64 is roughly equivalent to 48 epochs\nrunner = dict(type='IterBasedRunner', max_iters=90000)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    # 270k iterations with batch_size 64 is roughly equivalent to 144 epochs\n    '../common/ssj_scp_270k_coco_instance.py'\n]\n\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\n# Use MMSyncBN that handles empty tensor in head. It can be changed to\n# SyncBN after https://github.com/pytorch/pytorch/issues/36530 is fixed.\nhead_norm_cfg = dict(type='MMSyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(frozen_stages=-1, norm_eval=False, norm_cfg=norm_cfg),\n    neck=dict(norm_cfg=norm_cfg),\n    rpn_head=dict(num_convs=2),  # leads to 0.1+ mAP\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=head_norm_cfg),\n        mask_head=dict(norm_cfg=head_norm_cfg)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py'\n\n# lr steps at [0.9, 0.95, 0.975] of the maximum iterations\nlr_config = dict(\n    warmup_iters=500, warmup_ratio=0.067, step=[81000, 85500, 87750])\n# 90k iterations with batch_size 64 is roughly equivalent to 48 epochs\nrunner = dict(type='IterBasedRunner', max_iters=90000)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/simple_copy_paste/metafile.yml",
    "content": "Collections:\n  - Name: SimpleCopyPaste\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 32x A100 GPUs\n      Architecture:\n        - Softmax\n        - RPN\n        - Convolution\n        - Dense Connections\n        - FPN\n        - ResNet\n        - RoIAlign\n    Paper:\n      URL: https://arxiv.org/abs/2012.07177\n      Title: \"Simple Copy-Paste is a Strong Data Augmentation Method for Instance Segmentation\"\n    README: configs/simple_copy_paste/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.25.0/mmdet/datasets/pipelines/transforms.py#L2762\n      Version: v2.25.0\n\nModels:\n  - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco\n    In Collection: SimpleCopyPaste\n    Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      Iterations: 270000\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.5\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_270k_coco_20220324_182940-33a100c5.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco\n    In Collection: SimpleCopyPaste\n    Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      Iterations: 90000\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.3\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_32x2_90k_coco_20220316_181409-f79c84c5.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco\n    In Collection: SimpleCopyPaste\n    Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      Iterations: 270000\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.1\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 40.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_270k_coco_20220324_201229-80ee90b7.pth\n\n  - Name: mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco\n    In Collection: SimpleCopyPaste\n    Config: configs/simplecopypaste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco.py\n    Metadata:\n      Training Memory (GB): 7.2\n      Iterations: 90000\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.8\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/simple_copy_paste/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_ssj_scp_32x2_90k_coco_20220316_181307-6bc5726f.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solo/decoupled_solo_light_r50_fpn_3x_coco.py",
    "content": "_base_ = './decoupled_solo_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    mask_head=dict(\n        type='DecoupledSOLOLightHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=4,\n        feat_channels=256,\n        strides=[8, 8, 16, 32, 32],\n        scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),\n        pos_scale=0.2,\n        num_grids=[40, 36, 24, 16, 12],\n        cls_down_index=0,\n        loss_mask=dict(\n            type='DiceLoss', use_sigmoid=True, activate=False,\n            loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(852, 512), (852, 480), (852, 448), (852, 416), (852, 384),\n                   (852, 352)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(852, 512),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solo/decoupled_solo_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    './solo_r50_fpn_1x_coco.py',\n]\n# model settings\nmodel = dict(\n    mask_head=dict(\n        type='DecoupledSOLOHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=7,\n        feat_channels=256,\n        strides=[8, 8, 16, 32, 32],\n        scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),\n        pos_scale=0.2,\n        num_grids=[40, 36, 24, 16, 12],\n        cls_down_index=0,\n        loss_mask=dict(\n            type='DiceLoss', use_sigmoid=True, activate=False,\n            loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\n\noptimizer = dict(type='SGD', lr=0.01)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solo/decoupled_solo_r50_fpn_3x_coco.py",
    "content": "_base_ = './solo_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    mask_head=dict(\n        type='DecoupledSOLOHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=7,\n        feat_channels=256,\n        strides=[8, 8, 16, 32, 32],\n        scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),\n        pos_scale=0.2,\n        num_grids=[40, 36, 24, 16, 12],\n        cls_down_index=0,\n        loss_mask=dict(\n            type='DiceLoss', use_sigmoid=True, activate=False,\n            loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solo/metafile.yml",
    "content": "Collections:\n  - Name: SOLO\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - Convolution\n        - ResNet\n    Paper: https://arxiv.org/abs/1912.04488\n    README: configs/solo/README.md\n\nModels:\n  - Name: decoupled_solo_r50_fpn_1x_coco\n    In Collection: SOLO\n    Config: configs/solo/decoupled_solo_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      Epochs: 12\n    inference time (ms/im):\n      - value: 116.4\n        hardware: V100\n        backend: PyTorch\n        batch size: 1\n        mode: FP32\n        resolution: (1333, 800)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 33.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_1x_coco/decoupled_solo_r50_fpn_1x_coco_20210820_233348-6337c589.pth\n\n  - Name: decoupled_solo_r50_fpn_3x_coco\n    In Collection: SOLO\n    Config: configs/solo/decoupled_solo_r50_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.9\n      Epochs: 36\n    inference time (ms/im):\n      - value: 117.2\n        hardware: V100\n        backend: PyTorch\n        batch size: 1\n        mode: FP32\n        resolution: (1333, 800)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 36.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_3x_coco/decoupled_solo_r50_fpn_3x_coco_20210821_042504-7b3301ec.pth\n\n  - Name: decoupled_solo_light_r50_fpn_3x_coco\n    In Collection: SOLO\n    Config: configs/solo/decoupled_solo_light_r50_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 2.2\n      Epochs: 36\n    inference time (ms/im):\n      - value: 35.0\n        hardware: V100\n        backend: PyTorch\n        batch size: 1\n        mode: FP32\n        resolution: (852, 512)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 32.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_light_r50_fpn_3x_coco/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703-e70e226f.pth\n\n  - Name: solo_r50_fpn_3x_coco\n    In Collection: SOLO\n    Config: configs/solo/solo_r50_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.4\n      Epochs: 36\n    inference time (ms/im):\n      - value: 94.2\n        hardware: V100\n        backend: PyTorch\n        batch size: 1\n        mode: FP32\n        resolution: (1333, 800)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 35.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth\n\n  - Name: solo_r50_fpn_1x_coco\n    In Collection: SOLO\n    Config: configs/solo/solo_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.0\n      Epochs: 12\n    inference time (ms/im):\n      - value: 95.1\n        hardware: V100\n        backend: PyTorch\n        batch size: 1\n        mode: FP32\n        resolution: (1333, 800)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 33.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_1x_coco/solo_r50_fpn_1x_coco_20210821_035055-2290a6b8.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solo/solo_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# model settings\nmodel = dict(\n    type='SOLO',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=0,\n        num_outs=5),\n    mask_head=dict(\n        type='SOLOHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=7,\n        feat_channels=256,\n        strides=[8, 8, 16, 32, 32],\n        scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),\n        pos_scale=0.2,\n        num_grids=[40, 36, 24, 16, 12],\n        cls_down_index=0,\n        loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)),\n    # model training and testing settings\n    test_cfg=dict(\n        nms_pre=500,\n        score_thr=0.1,\n        mask_thr=0.5,\n        filter_thr=0.05,\n        kernel='gaussian',  # gaussian/linear\n        sigma=2.0,\n        max_per_img=100))\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.01)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solo/solo_r50_fpn_3x_coco.py",
    "content": "_base_ = './solo_r50_fpn_1x_coco.py'\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 800), (1333, 768), (1333, 736), (1333, 704),\n                   (1333, 672), (1333, 640)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[27, 33])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/metafile.yml",
    "content": "Collections:\n  - Name: SOLOv2\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x A100 GPUs\n      Architecture:\n        - FPN\n        - Convolution\n        - ResNet\n    Paper: https://arxiv.org/abs/2003.10152\n    README: configs/solov2/README.md\n\nModels:\n  - Name: solov2_r50_fpn_1x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 5.1\n      Epochs: 12\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 34.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco_20220512_125858-a357fa23.pth\n\n  - Name: solov2_r50_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_r50_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 5.1\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_3x_coco/solov2_r50_fpn_3x_coco_20220512_125856-fed092d4.pth\n\n  - Name: solov2_r101_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_r101_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 6.9\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_fpn_3x_coco/solov2_r101_fpn_3x_coco_20220511_095119-c559a076.pth\n\n  - Name: solov2_r101_dcn_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_r101_dcn_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.1\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_dcn_fpn_3x_coco/solov2_r101_dcn_fpn_3x_coco_20220513_214734-16c966cb.pth\n\n  - Name: solov2_x101_dcn_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_x101_dcn_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 11.3\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 42.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_x101_dcn_fpn_3x_coco/solov2_x101_dcn_fpn_3x_coco_20220513_214337-aef41095.pth\n\n  - Name: solov2_light_r18_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_light_r18_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 9.1\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 29.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r18_fpn_3x_coco/solov2_light_r18_fpn_3x_coco_20220511_083717-75fa355b.pth\n\n  - Name: solov2_light_r34_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_light_r34_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 9.3\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 31.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r34_fpn_3x_coco/solov2_light_r34_fpn_3x_coco_20220511_091839-e51659d3.pth\n\n  - Name: solov2_light_r50_fpn_3x_coco\n    In Collection: SOLOv2\n    Config: configs/solov2/solov2_light_r50_fpn_3x_coco.py\n    Metadata:\n      Training Memory (GB): 9.9\n      Epochs: 36\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 33.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r50_fpn_3x_coco/solov2_light_r50_fpn_3x_coco_20220512_165256-c93a6074.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_light_r18_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_light_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=18, init_cfg=dict(checkpoint='torchvision://resnet18')),\n    neck=dict(in_channels=[64, 128, 256, 512]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_light_r34_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_light_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=34, init_cfg=dict(checkpoint='torchvision://resnet34')),\n    neck=dict(in_channels=[64, 128, 256, 512]))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_light_r50_dcn_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    mask_head=dict(\n        feat_channels=256,\n        stacked_convs=3,\n        scale_ranges=((1, 64), (32, 128), (64, 256), (128, 512), (256, 2048)),\n        mask_feature_head=dict(out_channels=128),\n        dcn_cfg=dict(type='DCNv2'),\n        dcn_apply_to_all_conv=False))  # light solov2 head\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[27, 33])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n\n# data\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(768, 512), (768, 480), (768, 448), (768, 416), (768, 384),\n                   (768, 352)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(448, 768),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_light_r50_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_r50_fpn_1x_coco.py'\n\n# model settings\nmodel = dict(\n    mask_head=dict(\n        stacked_convs=2,\n        feat_channels=256,\n        scale_ranges=((1, 56), (28, 112), (56, 224), (112, 448), (224, 896)),\n        mask_feature_head=dict(out_channels=128)))\n\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[27, 33])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n\n# data\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(768, 512), (768, 480), (768, 448), (768, 416), (768, 384),\n                   (768, 352)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(448, 768),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_r101_dcn_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(checkpoint='torchvision://resnet101'),\n        dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    mask_head=dict(\n        mask_feature_head=dict(conv_cfg=dict(type='DCNv2')),\n        dcn_cfg=dict(type='DCNv2'),\n        dcn_apply_to_all_conv=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_r101_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        depth=101, init_cfg=dict(checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# model settings\nmodel = dict(\n    type='SOLOv2',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),\n        style='pytorch'),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=0,\n        num_outs=5),\n    mask_head=dict(\n        type='SOLOV2Head',\n        num_classes=80,\n        in_channels=256,\n        feat_channels=512,\n        stacked_convs=4,\n        strides=[8, 8, 16, 32, 32],\n        scale_ranges=((1, 96), (48, 192), (96, 384), (192, 768), (384, 2048)),\n        pos_scale=0.2,\n        num_grids=[40, 36, 24, 16, 12],\n        cls_down_index=0,\n        mask_feature_head=dict(\n            feat_channels=128,\n            start_level=0,\n            end_level=3,\n            out_channels=256,\n            mask_stride=4,\n            norm_cfg=dict(type='GN', num_groups=32, requires_grad=True)),\n        loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0)),\n    # model training and testing settings\n    test_cfg=dict(\n        nms_pre=500,\n        score_thr=0.1,\n        mask_thr=0.5,\n        filter_thr=0.05,\n        kernel='gaussian',  # gaussian/linear\n        sigma=2.0,\n        max_per_img=100))\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\noptimizer_config = dict(\n    _delete_=True, grad_clip=dict(max_norm=35, norm_type=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_r50_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_r50_fpn_1x_coco.py'\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 800), (1333, 768), (1333, 736), (1333, 704),\n                   (1333, 672), (1333, 640)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=1.0 / 3,\n    step=[27, 33])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/solov2/solov2_x101_dcn_fpn_3x_coco.py",
    "content": "_base_ = 'solov2_r50_fpn_3x_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')),\n    mask_head=dict(\n        mask_feature_head=dict(conv_cfg=dict(type='DCNv2')),\n        dcn_cfg=dict(type='DCNv2'),\n        dcn_apply_to_all_conv=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sparse_rcnn/metafile.yml",
    "content": "Collections:\n  - Name: Sparse R-CNN\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n        - Sparse R-CNN\n    Paper:\n      URL: https://arxiv.org/abs/2011.12450\n      Title: 'Sparse R-CNN: End-to-End Object Detection with Learnable Proposals'\n    README: configs/sparse_rcnn/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.9.0/mmdet/models/detectors/sparse_rcnn.py#L6\n      Version: v2.9.0\n\nModels:\n  - Name: sparse_rcnn_r50_fpn_1x_coco\n    In Collection: Sparse R-CNN\n    Config: configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco/sparse_rcnn_r50_fpn_1x_coco_20201222_214453-dc79b137.pth\n\n  - Name: sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco\n    In Collection: Sparse R-CNN\n    Config: configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco_20201218_154234-7bc5c054.pth\n\n  - Name: sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco\n    In Collection: Sparse R-CNN\n    Config: configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 45.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_024605-9fe92701.pth\n\n  - Name: sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco\n    In Collection: Sparse R-CNN\n    Config: configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco_20201223_121552-6c46c9d6.pth\n\n  - Name: sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco\n    In Collection: Sparse R-CNN\n    Config: configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20201223_023452-c23c3564.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sparse_rcnn/sparse_rcnn_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sparse_rcnn/sparse_rcnn_r101_fpn_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nnum_stages = 6\nnum_proposals = 100\nmodel = dict(\n    type='SparseRCNN',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=0,\n        add_extra_convs='on_input',\n        num_outs=4),\n    rpn_head=dict(\n        type='EmbeddingRPNHead',\n        num_proposals=num_proposals,\n        proposal_feature_channel=256),\n    roi_head=dict(\n        type='SparseRoIHead',\n        num_stages=num_stages,\n        stage_loss_weights=[1] * num_stages,\n        proposal_feature_channel=256,\n        bbox_roi_extractor=dict(\n            type='SingleRoIExtractor',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=256,\n            featmap_strides=[4, 8, 16, 32]),\n        bbox_head=[\n            dict(\n                type='DIIHead',\n                num_classes=80,\n                num_ffn_fcs=2,\n                num_heads=8,\n                num_cls_fcs=1,\n                num_reg_fcs=3,\n                feedforward_channels=2048,\n                in_channels=256,\n                dropout=0.0,\n                ffn_act_cfg=dict(type='ReLU', inplace=True),\n                dynamic_conv_cfg=dict(\n                    type='DynamicConv',\n                    in_channels=256,\n                    feat_channels=64,\n                    out_channels=256,\n                    input_feat_shape=7,\n                    act_cfg=dict(type='ReLU', inplace=True),\n                    norm_cfg=dict(type='LN')),\n                loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n                loss_iou=dict(type='GIoULoss', loss_weight=2.0),\n                loss_cls=dict(\n                    type='FocalLoss',\n                    use_sigmoid=True,\n                    gamma=2.0,\n                    alpha=0.25,\n                    loss_weight=2.0),\n                bbox_coder=dict(\n                    type='DeltaXYWHBBoxCoder',\n                    clip_border=False,\n                    target_means=[0., 0., 0., 0.],\n                    target_stds=[0.5, 0.5, 1., 1.])) for _ in range(num_stages)\n        ]),\n    # training and testing settings\n    train_cfg=dict(\n        rpn=None,\n        rcnn=[\n            dict(\n                assigner=dict(\n                    type='HungarianAssigner',\n                    cls_cost=dict(type='FocalLossCost', weight=2.0),\n                    reg_cost=dict(type='BBoxL1Cost', weight=5.0),\n                    iou_cost=dict(type='IoUCost', iou_mode='giou',\n                                  weight=2.0)),\n                sampler=dict(type='PseudoSampler'),\n                pos_weight=1) for _ in range(num_stages)\n        ]),\n    test_cfg=dict(rpn=None, rcnn=dict(max_per_img=num_proposals)))\n\n# optimizer\noptimizer = dict(_delete_=True, type='AdamW', lr=0.000025, weight_decay=0.0001)\noptimizer_config = dict(_delete_=True, grad_clip=dict(max_norm=1, norm_type=2))\n# learning policy\nlr_config = dict(policy='step', step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py'\nnum_proposals = 300\nmodel = dict(\n    rpn_head=dict(num_proposals=num_proposals),\n    test_cfg=dict(\n        _delete_=True, rpn=None, rcnn=dict(max_per_img=num_proposals)))\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# augmentation strategy originates from DETR.\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/sparse_rcnn/sparse_rcnn_r50_fpn_mstrain_480-800_3x_coco.py",
    "content": "_base_ = './sparse_rcnn_r50_fpn_1x_coco.py'\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\nmin_values = (480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, value) for value in min_values],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\n\ndata = dict(train=dict(pipeline=train_pipeline))\nlr_config = dict(policy='step', step=[27, 33])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ssd/metafile.yml",
    "content": "Collections:\n  - Name: SSD\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - VGG\n    Paper:\n      URL: https://arxiv.org/abs/1512.02325\n      Title: 'SSD: Single Shot MultiBox Detector'\n    README: configs/ssd/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.14.0/mmdet/models/dense_heads/ssd_head.py#L16\n      Version: v2.14.0\n\nModels:\n  - Name: ssd300_coco\n    In Collection: SSD\n    Config: configs/ssd/ssd300_coco.py\n    Metadata:\n      Training Memory (GB): 9.9\n      inference time (ms/im):\n        - value: 22.88\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (300, 300)\n      Epochs: 120\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 25.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd300_coco/ssd300_coco_20210803_015428-d231a06e.pth\n\n  - Name: ssd512_coco\n    In Collection: SSD\n    Config: configs/ssd/ssd512_coco.py\n    Metadata:\n      Training Memory (GB): 19.4\n      inference time (ms/im):\n        - value: 32.57\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (512, 512)\n      Epochs: 120\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 29.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ssd/ssd512_coco/ssd512_coco_20210803_022849-0a47a1ca.pth\n\n  - Name: ssdlite_mobilenetv2_scratch_600e_coco\n    In Collection: SSD\n    Config: configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py\n    Metadata:\n      Training Memory (GB): 4.0\n      inference time (ms/im):\n        - value: 14.3\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (320, 320)\n      Epochs: 600\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 21.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/ssd/ssdlite_mobilenetv2_scratch_600e_coco/ssdlite_mobilenetv2_scratch_600e_coco_20210629_110627-974d9307.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ssd/ssd300_coco.py",
    "content": "_base_ = [\n    '../_base_/models/ssd300.py', '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_2x.py', '../_base_/default_runtime.py'\n]\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(300, 300), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(300, 300),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=3,\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict(_delete_=True)\ncustom_hooks = [\n    dict(type='NumClassCheckHook'),\n    dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')\n]\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ssd/ssd300_fp16_coco.py",
    "content": "_base_ = ['./ssd300_coco.py']\n\nfp16 = dict(loss_scale='dynamic')\n\n# learning policy\n# In order to avoid non-convergence in the early stage of\n# mixed-precision training, the warmup in the lr_config is set to linear,\n# warmup_iters increases and warmup_ratio decreases.\nlr_config = dict(warmup='linear', warmup_iters=1000, warmup_ratio=1.0 / 10)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ssd/ssd512_coco.py",
    "content": "_base_ = 'ssd300_coco.py'\ninput_size = 512\nmodel = dict(\n    neck=dict(\n        out_channels=(512, 1024, 512, 256, 256, 256, 256),\n        level_strides=(2, 2, 2, 2, 1),\n        level_paddings=(1, 1, 1, 1, 1),\n        last_kernel_size=4),\n    bbox_head=dict(\n        in_channels=(512, 1024, 512, 256, 256, 256, 256),\n        anchor_generator=dict(\n            type='SSDAnchorGenerator',\n            scale_major=False,\n            input_size=input_size,\n            basesize_ratio_range=(0.1, 0.9),\n            strides=[8, 16, 32, 64, 128, 256, 512],\n            ratios=[[2], [2, 3], [2, 3], [2, 3], [2, 3], [2], [2]])))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[1, 1, 1], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(512, 512), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(512, 512),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=3,\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=2e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict(_delete_=True)\ncustom_hooks = [\n    dict(type='NumClassCheckHook'),\n    dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')\n]\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ssd/ssd512_fp16_coco.py",
    "content": "_base_ = ['./ssd512_coco.py']\n# fp16 settings\nfp16 = dict(loss_scale='dynamic')\n\n# learning policy\n# In order to avoid non-convergence in the early stage of\n# mixed-precision training, the warmup in the lr_config is set to linear,\n# warmup_iters increases and warmup_ratio decreases.\nlr_config = dict(warmup='linear', warmup_iters=1000, warmup_ratio=1.0 / 10)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/ssd/ssdlite_mobilenetv2_scratch_600e_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='SingleStageDetector',\n    backbone=dict(\n        type='MobileNetV2',\n        out_indices=(4, 7),\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),\n        init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),\n    neck=dict(\n        type='SSDNeck',\n        in_channels=(96, 1280),\n        out_channels=(96, 1280, 512, 256, 256, 128),\n        level_strides=(2, 2, 2, 2),\n        level_paddings=(1, 1, 1, 1),\n        l2_norm_scale=None,\n        use_depthwise=True,\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),\n        act_cfg=dict(type='ReLU6'),\n        init_cfg=dict(type='TruncNormal', layer='Conv2d', std=0.03)),\n    bbox_head=dict(\n        type='SSDHead',\n        in_channels=(96, 1280, 512, 256, 256, 128),\n        num_classes=80,\n        use_depthwise=True,\n        norm_cfg=dict(type='BN', eps=0.001, momentum=0.03),\n        act_cfg=dict(type='ReLU6'),\n        init_cfg=dict(type='Normal', layer='Conv2d', std=0.001),\n\n        # set anchor size manually instead of using the predefined\n        # SSD300 setting.\n        anchor_generator=dict(\n            type='SSDAnchorGenerator',\n            scale_major=False,\n            strides=[16, 32, 64, 107, 160, 320],\n            ratios=[[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]],\n            min_sizes=[48, 100, 150, 202, 253, 304],\n            max_sizes=[100, 150, 202, 253, 304, 320]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2])),\n    # model training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0.,\n            ignore_iof_thr=-1,\n            gt_max_assign_all=False),\n        smoothl1_beta=1.,\n        allowed_border=-1,\n        pos_weight=-1,\n        neg_pos_ratio=3,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        nms=dict(type='nms', iou_threshold=0.45),\n        min_bbox_size=0,\n        score_thr=0.02,\n        max_per_img=200))\ncudnn_benchmark = True\n\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(320, 320), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=320),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(320, 320),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=320),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=24,\n    workers_per_gpu=4,\n    train=dict(\n        _delete_=True,\n        type='RepeatDataset',  # use RepeatDataset to speed up training\n        times=5,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# optimizer\noptimizer = dict(type='SGD', lr=0.015, momentum=0.9, weight_decay=4.0e-5)\noptimizer_config = dict(grad_clip=None)\n\n# learning policy\nlr_config = dict(\n    policy='CosineAnnealing',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.001,\n    min_lr=0)\nrunner = dict(type='EpochBasedRunner', max_epochs=120)\n\n# Avoid evaluation and saving weights too frequently\nevaluation = dict(interval=5, metric='bbox')\ncheckpoint_config = dict(interval=5)\ncustom_hooks = [\n    dict(type='NumClassCheckHook'),\n    dict(type='CheckInvalidLossHook', interval=50, priority='VERY_LOW')\n]\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (24 samples per GPU)\nauto_scale_lr = dict(base_batch_size=192)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/strong_baselines/mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../common/lsj_100e_coco_instance.py'\n]\n\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\n# Use MMSyncBN that handles empty tensor in head. It can be changed to\n# SyncBN after https://github.com/pytorch/pytorch/issues/36530 is fixed\n# Requires MMCV-full after  https://github.com/open-mmlab/mmcv/pull/1205.\nhead_norm_cfg = dict(type='MMSyncBN', requires_grad=True)\nmodel = dict(\n    backbone=dict(\n        frozen_stages=-1,\n        norm_eval=False,\n        norm_cfg=norm_cfg,\n        init_cfg=None,\n        style='caffe'),\n    neck=dict(norm_cfg=norm_cfg),\n    rpn_head=dict(num_convs=2),\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=head_norm_cfg),\n        mask_head=dict(norm_cfg=head_norm_cfg)))\n\nfile_client_args = dict(backend='disk')\n# file_client_args = dict(\n#     backend='petrel',\n#     path_mapping=dict({\n#         './data/': 's3://openmmlab/datasets/detection/',\n#         'data/': 's3://openmmlab/datasets/detection/'\n#     }))\n\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\nimage_size = (1024, 1024)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(\n        type='Resize',\n        img_scale=image_size,\n        ratio_range=(0.1, 2.0),\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(\n        type='RandomCrop',\n        crop_type='absolute_range',\n        crop_size=image_size,\n        recompute_bbox=True,\n        allow_negative_crop=True),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1e-2, 1e-2)),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size=image_size),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile', file_client_args=file_client_args),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\n\n# Use RepeatDataset to speed up training\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/strong_baselines/mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_fp16_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py'\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/strong_baselines/mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_400e_coco.py",
    "content": "_base_ = './mask_rcnn_r50_caffe_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py'\n\n# Use RepeatDataset to speed up training\n# change repeat time from 4 (for 100 epochs) to 16 (for 400 epochs)\ndata = dict(train=dict(times=4 * 4))\nlr_config = dict(warmup_iters=500 * 4)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/strong_baselines/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../common/lsj_100e_coco_instance.py'\n]\n\nnorm_cfg = dict(type='SyncBN', requires_grad=True)\n# Use MMSyncBN that handles empty tensor in head. It can be changed to\n# SyncBN after https://github.com/pytorch/pytorch/issues/36530 is fixed\n# Requires MMCV-full after  https://github.com/open-mmlab/mmcv/pull/1205.\nhead_norm_cfg = dict(type='MMSyncBN', requires_grad=True)\nmodel = dict(\n    # the model is trained from scratch, so init_cfg is None\n    backbone=dict(\n        frozen_stages=-1, norm_eval=False, norm_cfg=norm_cfg, init_cfg=None),\n    neck=dict(norm_cfg=norm_cfg),\n    rpn_head=dict(num_convs=2),  # leads to 0.1+ mAP\n    roi_head=dict(\n        bbox_head=dict(\n            type='Shared4Conv1FCBBoxHead',\n            conv_out_channels=256,\n            norm_cfg=head_norm_cfg),\n        mask_head=dict(norm_cfg=head_norm_cfg)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/strong_baselines/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_fp16_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py'\n# use FP16\nfp16 = dict(loss_scale=512.)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/strong_baselines/mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_50e_coco.py",
    "content": "_base_ = 'mask_rcnn_r50_fpn_syncbn-all_rpn-2conv_lsj_100e_coco.py'\n\n# Use RepeatDataset to speed up training\n# change repeat time from 4 (for 100 epochs) to 2 (for 50 epochs)\ndata = dict(train=dict(times=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py",
    "content": "_base_ = './mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py'\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_small_patch4_window7_224.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        depths=[2, 2, 18, 2],\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa\nmodel = dict(\n    type='MaskRCNN',\n    backbone=dict(\n        _delete_=True,\n        type='SwinTransformer',\n        embed_dims=96,\n        depths=[2, 2, 6, 2],\n        num_heads=[3, 6, 12, 24],\n        window_size=7,\n        mlp_ratio=4,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.2,\n        patch_norm=True,\n        out_indices=(0, 1, 2, 3),\n        with_cp=False,\n        convert_weights=True,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    neck=dict(in_channels=[96, 192, 384, 768]))\n\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0001,\n    betas=(0.9, 0.999),\n    weight_decay=0.05,\n    paramwise_cfg=dict(\n        custom_keys={\n            'absolute_pos_embed': dict(decay_mult=0.),\n            'relative_position_bias_table': dict(decay_mult=0.),\n            'norm': dict(decay_mult=0.)\n        }))\nlr_config = dict(warmup_iters=1000, step=[8, 11])\nrunner = dict(max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py",
    "content": "_base_ = './mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py'\n# you need to set mode='dynamic' if you are using pytorch<=1.5.0\nfp16 = dict(loss_scale=dict(init_scale=512))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/mask_rcnn_r50_fpn.py',\n    '../_base_/datasets/coco_instance.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa\n\nmodel = dict(\n    type='MaskRCNN',\n    backbone=dict(\n        _delete_=True,\n        type='SwinTransformer',\n        embed_dims=96,\n        depths=[2, 2, 6, 2],\n        num_heads=[3, 6, 12, 24],\n        window_size=7,\n        mlp_ratio=4,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.2,\n        patch_norm=True,\n        out_indices=(0, 1, 2, 3),\n        with_cp=False,\n        convert_weights=True,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    neck=dict(in_channels=[96, 192, 384, 768]))\n\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\n\n# augmentation strategy originates from DETR / Sparse RCNN\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='AutoAugment',\n        policies=[[\n            dict(\n                type='Resize',\n                img_scale=[(480, 1333), (512, 1333), (544, 1333), (576, 1333),\n                           (608, 1333), (640, 1333), (672, 1333), (704, 1333),\n                           (736, 1333), (768, 1333), (800, 1333)],\n                multiscale_mode='value',\n                keep_ratio=True)\n        ],\n                  [\n                      dict(\n                          type='Resize',\n                          img_scale=[(400, 1333), (500, 1333), (600, 1333)],\n                          multiscale_mode='value',\n                          keep_ratio=True),\n                      dict(\n                          type='RandomCrop',\n                          crop_type='absolute_range',\n                          crop_size=(384, 600),\n                          allow_negative_crop=True),\n                      dict(\n                          type='Resize',\n                          img_scale=[(480, 1333), (512, 1333), (544, 1333),\n                                     (576, 1333), (608, 1333), (640, 1333),\n                                     (672, 1333), (704, 1333), (736, 1333),\n                                     (768, 1333), (800, 1333)],\n                          multiscale_mode='value',\n                          override=True,\n                          keep_ratio=True)\n                  ]]),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n\noptimizer = dict(\n    _delete_=True,\n    type='AdamW',\n    lr=0.0001,\n    betas=(0.9, 0.999),\n    weight_decay=0.05,\n    paramwise_cfg=dict(\n        custom_keys={\n            'absolute_pos_embed': dict(decay_mult=0.),\n            'relative_position_bias_table': dict(decay_mult=0.),\n            'norm': dict(decay_mult=0.)\n        }))\nlr_config = dict(warmup_iters=1000, step=[27, 33])\nrunner = dict(max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/swin/metafile.yml",
    "content": "Models:\n  - Name: mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco.py\n    Metadata:\n      Training Memory (GB): 11.9\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Swin Transformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 48.2\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 43.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-s-p4-w7_fpn_fp16_ms-crop-3x_coco_20210903_104808-b92c91f1.pth\n    Paper:\n      URL: https://arxiv.org/abs/2107.08430\n      Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows'\n    README: configs/swin/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.16.0\n\n  - Name: mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco.py\n    Metadata:\n      Training Memory (GB): 10.2\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Swin Transformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_ms-crop-3x_coco_20210906_131725-bacf6f7b.pth\n    Paper:\n      URL: https://arxiv.org/abs/2107.08430\n      Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows'\n    README: configs/swin/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.16.0\n\n  - Name: mask_rcnn_swin-t-p4-w7_fpn_1x_coco\n    In Collection: Mask R-CNN\n    Config: configs/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      Epochs: 12\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Swin Transformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.7\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 39.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_1x_coco/mask_rcnn_swin-t-p4-w7_fpn_1x_coco_20210902_120937-9d6b7cfa.pth\n    Paper:\n      URL: https://arxiv.org/abs/2107.08430\n      Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows'\n    README: configs/swin/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.16.0\n\n  - Name: mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco\n    In Collection: Mask R-CNN\n    Config: configs/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco.py\n    Metadata:\n      Training Memory (GB): 7.8\n      Epochs: 36\n      Training Data: COCO\n      Training Techniques:\n        - AdamW\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Swin Transformer\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.0\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 41.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/swin/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco/mask_rcnn_swin-t-p4-w7_fpn_fp16_ms-crop-3x_coco_20210908_165006-90a4008c.pth\n    Paper:\n      URL: https://arxiv.org/abs/2107.08430\n      Title: 'Swin Transformer: Hierarchical Vision Transformer using Shifted Windows'\n    README: configs/swin/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.16.0/mmdet/models/backbones/swin.py#L465\n      Version: v2.16.0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/swin/retinanet_swin-t-p4-w7_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\npretrained = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='SwinTransformer',\n        embed_dims=96,\n        depths=[2, 2, 6, 2],\n        num_heads=[3, 6, 12, 24],\n        window_size=7,\n        mlp_ratio=4,\n        qkv_bias=True,\n        qk_scale=None,\n        drop_rate=0.,\n        attn_drop_rate=0.,\n        drop_path_rate=0.2,\n        patch_norm=True,\n        out_indices=(1, 2, 3),\n        # Please only add indices that would be used\n        # in FPN, otherwise some parameter will not be used\n        with_cp=False,\n        convert_weights=True,\n        init_cfg=dict(type='Pretrained', checkpoint=pretrained)),\n    neck=dict(in_channels=[192, 384, 768], start_level=0, num_outs=5))\n\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/timm_example/retinanet_timm_efficientnet_b1_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# please install mmcls>=0.20.0\n# import mmcls.models to trigger register_module in mmcls\ncustom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='mmcls.TIMMBackbone',\n        model_name='efficientnet_b1',\n        features_only=True,\n        pretrained=True,\n        out_indices=(1, 2, 3, 4)),\n    neck=dict(in_channels=[24, 40, 112, 320]))\n\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/timm_example/retinanet_timm_tv_resnet50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/retinanet_r50_fpn.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\n# please install mmcls>=0.20.0\n# import mmcls.models to trigger register_module in mmcls\ncustom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)\nmodel = dict(\n    backbone=dict(\n        _delete_=True,\n        type='mmcls.TIMMBackbone',\n        model_name='tv_resnet50',  # ResNet-50 with torchvision weights\n        features_only=True,\n        pretrained=True,\n        out_indices=(1, 2, 3, 4)))\n\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/metafile.yml",
    "content": "Collections:\n  - Name: TOOD\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - TOOD\n    Paper:\n      URL: https://arxiv.org/abs/2108.07755\n      Title: 'TOOD: Task-aligned One-stage Object Detection'\n    README: configs/tood/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.20.0/mmdet/models/detectors/tood.py#L7\n      Version: v2.20.0\n\nModels:\n  - Name: tood_r101_fpn_mstrain_2x_coco\n    In Collection: TOOD\n    Config: configs/tood/tood_r101_fpn_mstrain_2x_coco.py\n    Metadata:\n      Training Memory (GB): 6.0\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.1\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_mstrain_2x_coco/tood_r101_fpn_mstrain_2x_coco_20211210_144232-a18f53c8.pth\n\n  - Name: tood_x101_64x4d_fpn_mstrain_2x_coco\n    In Collection: TOOD\n    Config: configs/tood/tood_x101_64x4d_fpn_mstrain_2x_coco.py\n    Metadata:\n      Training Memory (GB): 10.2\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 47.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tood/tood_x101_64x4d_fpn_mstrain_2x_coco/tood_x101_64x4d_fpn_mstrain_2x_coco_20211211_003519-a4f36113.pth\n\n  - Name: tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco\n    In Collection: TOOD\n    Config: configs/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py\n    Metadata:\n      Training Memory (GB): 6.2\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 49.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco_20211210_213728-4a824142.pth\n\n  - Name: tood_r50_fpn_anchor_based_1x_coco\n    In Collection: TOOD\n    Config: configs/tood/tood_r50_fpn_anchor_based_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.1\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_anchor_based_1x_coco/tood_r50_fpn_anchor_based_1x_coco_20211214_100105-b776c134.pth\n\n  - Name: tood_r50_fpn_1x_coco\n    In Collection: TOOD\n    Config: configs/tood/tood_r50_fpn_1x_coco.py\n    Metadata:\n      Training Memory (GB): 4.1\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 42.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_1x_coco/tood_r50_fpn_1x_coco_20211210_103425-20e20746.pth\n\n  - Name: tood_r50_fpn_mstrain_2x_coco\n    In Collection: TOOD\n    Config: configs/tood/tood_r50_fpn_mstrain_2x_coco.py\n    Metadata:\n      Training Memory (GB): 4.1\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tood/tood_r50_fpn_mstrain_2x_coco/tood_r50_fpn_mstrain_2x_coco_20211210_144231-3b23174c.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_r101_fpn_dconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './tood_r101_fpn_mstrain_2x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    bbox_head=dict(num_dcn=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_r101_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './tood_r50_fpn_mstrain_2x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='TOOD',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',\n        num_outs=5),\n    bbox_head=dict(\n        type='TOODHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=6,\n        feat_channels=256,\n        anchor_type='anchor_free',\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        initial_loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            activated=True,  # use probability instead of logit as input\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            activated=True,  # use probability instead of logit as input\n            beta=2.0,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0)),\n    train_cfg=dict(\n        initial_epoch=4,\n        initial_assigner=dict(type='ATSSAssigner', topk=9),\n        assigner=dict(type='TaskAlignedAssigner', topk=13),\n        alpha=1,\n        beta=6,\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n\n# custom hooks\ncustom_hooks = [dict(type='SetEpochInfoHook')]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_r50_fpn_anchor_based_1x_coco.py",
    "content": "_base_ = './tood_r50_fpn_1x_coco.py'\nmodel = dict(bbox_head=dict(anchor_type='anchor_based'))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_r50_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './tood_r50_fpn_1x_coco.py'\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n# multi-scale training\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 800)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_x101_64x4d_fpn_dconv_c4-c5_mstrain_2x_coco.py",
    "content": "_base_ = './tood_x101_64x4d_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deformable_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, False, True, True),\n    ),\n    bbox_head=dict(num_dcn=2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tood/tood_x101_64x4d_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './tood_r50_fpn_mstrain_2x_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tridentnet/metafile.yml",
    "content": "Collections:\n  - Name: TridentNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - ResNet\n        - TridentNet Block\n    Paper:\n      URL: https://arxiv.org/abs/1901.01892\n      Title: 'Scale-Aware Trident Networks for Object Detection'\n    README: configs/tridentnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.8.0/mmdet/models/detectors/trident_faster_rcnn.py#L6\n      Version: v2.8.0\n\nModels:\n  - Name: tridentnet_r50_caffe_1x_coco\n    In Collection: TridentNet\n    Config: configs/tridentnet/tridentnet_r50_caffe_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_1x_coco/tridentnet_r50_caffe_1x_coco_20201230_141838-2ec0b530.pth\n\n  - Name: tridentnet_r50_caffe_mstrain_1x_coco\n    In Collection: TridentNet\n    Config: configs/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco/tridentnet_r50_caffe_mstrain_1x_coco_20201230_141839-6ce55ccb.pth\n\n  - Name: tridentnet_r50_caffe_mstrain_3x_coco\n    In Collection: TridentNet\n    Config: configs/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco.py\n    Metadata:\n      Epochs: 36\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.3\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco/tridentnet_r50_caffe_mstrain_3x_coco_20201130_100539-46d227ba.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tridentnet/tridentnet_r50_caffe_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/models/faster_rcnn_r50_caffe_c4.py',\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n\nmodel = dict(\n    type='TridentFasterRCNN',\n    backbone=dict(\n        type='TridentResNet',\n        trident_dilations=(1, 2, 3),\n        num_branch=3,\n        test_branch_idx=1,\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron2/resnet50_caffe')),\n    roi_head=dict(type='TridentRoIHead', num_branch=3, test_branch_idx=1),\n    train_cfg=dict(\n        rpn_proposal=dict(max_per_img=500),\n        rcnn=dict(\n            sampler=dict(num=128, pos_fraction=0.5,\n                         add_gt_as_proposals=False))))\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tridentnet/tridentnet_r50_caffe_mstrain_1x_coco.py",
    "content": "_base_ = 'tridentnet_r50_caffe_1x_coco.py'\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),\n                   (1333, 768), (1333, 800)],\n        multiscale_mode='value',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\n\ndata = dict(train=dict(pipeline=train_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/tridentnet/tridentnet_r50_caffe_mstrain_3x_coco.py",
    "content": "_base_ = 'tridentnet_r50_caffe_mstrain_1x_coco.py'\n\nlr_config = dict(step=[28, 34])\nrunner = dict(type='EpochBasedRunner', max_epochs=36)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/metafile.yml",
    "content": "Collections:\n  - Name: VFNet\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n        - Varifocal Loss\n    Paper:\n      URL: https://arxiv.org/abs/2008.13367\n      Title: 'VarifocalNet: An IoU-aware Dense Object Detector'\n    README: configs/vfnet/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.6.0/mmdet/models/detectors/vfnet.py#L6\n      Version: v2.6.0\n\nModels:\n  - Name: vfnet_r50_fpn_1x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_r50_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 41.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_1x_coco/vfnet_r50_fpn_1x_coco_20201027-38db6f58.pth\n\n  - Name: vfnet_r50_fpn_mstrain_2x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 44.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mstrain_2x_coco/vfnet_r50_fpn_mstrain_2x_coco_20201027-7cc75bd2.pth\n\n  - Name: vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 48.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-6879c318.pth\n\n  - Name: vfnet_r101_fpn_1x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_r101_fpn_1x_coco.py\n    Metadata:\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 43.6\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_1x_coco/vfnet_r101_fpn_1x_coco_20201027pth-c831ece7.pth\n\n  - Name: vfnet_r101_fpn_mstrain_2x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 46.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mstrain_2x_coco/vfnet_r101_fpn_mstrain_2x_coco_20201027pth-4a5d53f1.pth\n\n  - Name: vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 49.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-7729adb5.pth\n\n  - Name: vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 50.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-d300a6fc.pth\n\n  - Name: vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco\n    In Collection: VFNet\n    Config: configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py\n    Metadata:\n      Epochs: 24\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 50.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco_20201027pth-b5f6da5e.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r101_fpn_1x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r101_fpn_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_1x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r101_fpn_mdconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNet',\n        depth=101,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r101_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r2_101_fpn_mdconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r2_101_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='Res2Net',\n        depth=101,\n        scales=4,\n        base_width=26,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://res2net101_v1d_26w_4s')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r50_fpn_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\n# model settings\nmodel = dict(\n    type='VFNet',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_output',  # use P5\n        num_outs=5,\n        relu_before_extra_convs=True),\n    bbox_head=dict(\n        type='VFNetHead',\n        num_classes=80,\n        in_channels=256,\n        stacked_convs=3,\n        feat_channels=256,\n        strides=[8, 16, 32, 64, 128],\n        center_sampling=False,\n        dcn_on_last_conv=False,\n        use_atss=True,\n        use_vfl=True,\n        loss_cls=dict(\n            type='VarifocalLoss',\n            use_sigmoid=True,\n            alpha=0.75,\n            gamma=2.0,\n            iou_weighted=True,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.5),\n        loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(type='ATSSAssigner', topk=9),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n\n# data setting\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='DefaultFormatBundle'),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=2,\n    workers_per_gpu=2,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# optimizer\noptimizer = dict(\n    lr=0.01, paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.))\noptimizer_config = dict(grad_clip=None)\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.1,\n    step=[8, 11])\nrunner = dict(type='EpochBasedRunner', max_epochs=12)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True)),\n    bbox_head=dict(dcn_on_last_conv=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_r50_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_1x_coco.py'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Resize',\n        img_scale=[(1333, 480), (1333, 960)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='DefaultFormatBundle'),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n# learning policy\nlr_config = dict(step=[16, 22])\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_x101_32x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_x101_32x4d_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=32,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_32x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mdconv_c3-c5_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),\n        stage_with_dcn=(False, True, True, True),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/vfnet/vfnet_x101_64x4d_fpn_mstrain_2x_coco.py",
    "content": "_base_ = './vfnet_r50_fpn_mstrain_2x_coco.py'\nmodel = dict(\n    backbone=dict(\n        type='ResNeXt',\n        depth=101,\n        groups=64,\n        base_width=4,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://resnext101_64x4d')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/wider_face/ssd300_wider_face.py",
    "content": "_base_ = [\n    '../_base_/models/ssd300.py', '../_base_/datasets/wider_face.py',\n    '../_base_/default_runtime.py'\n]\nmodel = dict(bbox_head=dict(num_classes=1))\n# optimizer\noptimizer = dict(type='SGD', lr=0.012, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.001,\n    step=[16, 20])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=24)\nlog_config = dict(interval=1)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolact/metafile.yml",
    "content": "Collections:\n  - Name: YOLACT\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - FPN\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/1904.02689\n      Title: 'YOLACT: Real-time Instance Segmentation'\n    README: configs/yolact/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.5.0/mmdet/models/detectors/yolact.py#L9\n      Version: v2.5.0\n\nModels:\n  - Name: yolact_r50_1x8_coco\n    In Collection: YOLACT\n    Config: configs/yolact/yolact_r50_1x8_coco.py\n    Metadata:\n      Training Resources: 1x V100 GPU\n      Batch Size: 8\n      inference time (ms/im):\n        - value: 23.53\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (550, 550)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 29.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_1x8_coco/yolact_r50_1x8_coco_20200908-f38d58df.pth\n\n  - Name: yolact_r50_8x8_coco\n    In Collection: YOLACT\n    Config: configs/yolact/yolact_r50_8x8_coco.py\n    Metadata:\n      Batch Size: 64\n      inference time (ms/im):\n        - value: 23.53\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (550, 550)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 28.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_8x8_coco/yolact_r50_8x8_coco_20200908-ca34f5db.pth\n\n  - Name: yolact_r101_1x8_coco\n    In Collection: YOLACT\n    Config: configs/yolact/yolact_r101_1x8_coco.py\n    Metadata:\n      Training Resources: 1x V100 GPU\n      Batch Size: 8\n      inference time (ms/im):\n        - value: 29.85\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (550, 550)\n    Results:\n      - Task: Instance Segmentation\n        Dataset: COCO\n        Metrics:\n          mask AP: 30.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r101_1x8_coco/yolact_r101_1x8_coco_20200908-4cbe9101.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolact/yolact_r101_1x8_coco.py",
    "content": "_base_ = './yolact_r50_1x8_coco.py'\n\nmodel = dict(\n    backbone=dict(\n        depth=101,\n        init_cfg=dict(type='Pretrained',\n                      checkpoint='torchvision://resnet101')))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolact/yolact_r50_1x8_coco.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n\n# model settings\nimg_size = 550\nmodel = dict(\n    type='YOLACT',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=-1,  # do not freeze stem\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=False,  # update the statistics of bn\n        zero_init_residual=False,\n        style='pytorch',\n        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),\n    neck=dict(\n        type='FPN',\n        in_channels=[256, 512, 1024, 2048],\n        out_channels=256,\n        start_level=1,\n        add_extra_convs='on_input',\n        num_outs=5,\n        upsample_cfg=dict(mode='bilinear')),\n    bbox_head=dict(\n        type='YOLACTHead',\n        num_classes=80,\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=3,\n            scales_per_octave=1,\n            base_sizes=[8, 16, 32, 64, 128],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[550.0 / x for x in [69, 35, 18, 9, 5]],\n            centers=[(550 * 0.5 / x, 550 * 0.5 / x)\n                     for x in [69, 35, 18, 9, 5]]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            reduction='none',\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5),\n        num_head_convs=1,\n        num_protos=32,\n        use_ohem=True),\n    mask_head=dict(\n        type='YOLACTProtonet',\n        in_channels=256,\n        num_protos=32,\n        num_classes=80,\n        max_masks_to_train=100,\n        loss_mask_weight=6.125),\n    segm_head=dict(\n        type='YOLACTSegmHead',\n        num_classes=80,\n        in_channels=256,\n        loss_segm=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='MaxIoUAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.4,\n            min_pos_iou=0.,\n            ignore_iof_thr=-1,\n            gt_max_assign_all=False),\n        # smoothl1_beta=1.,\n        allowed_border=-1,\n        pos_weight=-1,\n        neg_pos_ratio=3,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        iou_thr=0.5,\n        top_k=200,\n        max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.68, 116.78, 103.94], std=[58.40, 57.12, 57.38], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(4.0, 4.0)),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 4)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.1, 0.3, 0.5, 0.7, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(img_size, img_size), keep_ratio=False),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(\n        type='PhotoMetricDistortion',\n        brightness_delta=32,\n        contrast_range=(0.5, 1.5),\n        saturation_range=(0.5, 1.5),\n        hue_delta=18),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_masks']),\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(img_size, img_size),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=False),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=1e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict()\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=500,\n    warmup_ratio=0.1,\n    step=[20, 42, 49, 52])\nrunner = dict(type='EpochBasedRunner', max_epochs=55)\ncudnn_benchmark = True\nevaluation = dict(metric=['bbox', 'segm'])\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (1 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=8)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolact/yolact_r50_8x8_coco.py",
    "content": "_base_ = 'yolact_r50_1x8_coco.py'\n\noptimizer = dict(type='SGD', lr=8e-3, momentum=0.9, weight_decay=5e-4)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=1000,\n    warmup_ratio=0.1,\n    step=[20, 42, 49, 52])\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/metafile.yml",
    "content": "Collections:\n  - Name: YOLOv3\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - DarkNet\n    Paper:\n      URL: https://arxiv.org/abs/1804.02767\n      Title: 'YOLOv3: An Incremental Improvement'\n    README: configs/yolo/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.4.0/mmdet/models/detectors/yolo.py#L8\n      Version: v2.4.0\n\nModels:\n  - Name: yolov3_d53_320_273e_coco\n    In Collection: YOLOv3\n    Config: configs/yolo/yolov3_d53_320_273e_coco.py\n    Metadata:\n      Training Memory (GB): 2.7\n      inference time (ms/im):\n        - value: 15.65\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (320, 320)\n      Epochs: 273\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 27.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_320_273e_coco/yolov3_d53_320_273e_coco-421362b6.pth\n\n  - Name: yolov3_d53_mstrain-416_273e_coco\n    In Collection: YOLOv3\n    Config: configs/yolo/yolov3_d53_mstrain-416_273e_coco.py\n    Metadata:\n      Training Memory (GB): 3.8\n      inference time (ms/im):\n        - value: 16.34\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (416, 416)\n      Epochs: 273\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 30.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-416_273e_coco/yolov3_d53_mstrain-416_273e_coco-2b60fcd9.pth\n\n  - Name: yolov3_d53_mstrain-608_273e_coco\n    In Collection: YOLOv3\n    Config: configs/yolo/yolov3_d53_mstrain-608_273e_coco.py\n    Metadata:\n      Training Memory (GB): 7.4\n      inference time (ms/im):\n        - value: 20.79\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP32\n          resolution: (608, 608)\n      Epochs: 273\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 33.7\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_mstrain-608_273e_coco/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth\n\n  - Name: yolov3_d53_fp16_mstrain-608_273e_coco\n    In Collection: YOLOv3\n    Config: configs/yolo/yolov3_d53_fp16_mstrain-608_273e_coco.py\n    Metadata:\n      Training Memory (GB): 4.7\n      inference time (ms/im):\n        - value: 20.79\n          hardware: V100\n          backend: PyTorch\n          batch size: 1\n          mode: FP16\n          resolution: (608, 608)\n      Epochs: 273\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 33.8\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_d53_fp16_mstrain-608_273e_coco/yolov3_d53_fp16_mstrain-608_273e_coco_20210517_213542-4bc34944.pth\n\n  - Name: yolov3_mobilenetv2_320_300e_coco\n    In Collection: YOLOv3\n    Config: configs/yolo/yolov3_mobilenetv2_320_300e_coco.py\n    Metadata:\n      Training Memory (GB): 3.2\n      Epochs: 300\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 22.2\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_320_300e_coco/yolov3_mobilenetv2_320_300e_coco_20210719_215349-d18dff72.pth\n\n  - Name: yolov3_mobilenetv2_mstrain-416_300e_coco\n    In Collection: YOLOv3\n    Config: configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py\n    Metadata:\n      Training Memory (GB): 5.3\n      Epochs: 300\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 23.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco/yolov3_mobilenetv2_mstrain-416_300e_coco_20210718_010823-f68a07b3.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/yolov3_d53_320_273e_coco.py",
    "content": "_base_ = './yolov3_d53_mstrain-608_273e_coco.py'\n# dataset settings\nimg_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 2)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(320, 320), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='PhotoMetricDistortion'),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(320, 320),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/yolov3_d53_fp16_mstrain-608_273e_coco.py",
    "content": "_base_ = './yolov3_d53_mstrain-608_273e_coco.py'\n# fp16 settings\nfp16 = dict(loss_scale='dynamic')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/yolov3_d53_mstrain-416_273e_coco.py",
    "content": "_base_ = './yolov3_d53_mstrain-608_273e_coco.py'\n# dataset settings\nimg_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 2)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=[(320, 320), (416, 416)], keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='PhotoMetricDistortion'),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(416, 416),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# model settings\nmodel = dict(\n    type='YOLOV3',\n    backbone=dict(\n        type='Darknet',\n        depth=53,\n        out_indices=(3, 4, 5),\n        init_cfg=dict(type='Pretrained', checkpoint='open-mmlab://darknet53')),\n    neck=dict(\n        type='YOLOV3Neck',\n        num_scales=3,\n        in_channels=[1024, 512, 256],\n        out_channels=[512, 256, 128]),\n    bbox_head=dict(\n        type='YOLOV3Head',\n        num_classes=80,\n        in_channels=[512, 256, 128],\n        out_channels=[1024, 512, 256],\n        anchor_generator=dict(\n            type='YOLOAnchorGenerator',\n            base_sizes=[[(116, 90), (156, 198), (373, 326)],\n                        [(30, 61), (62, 45), (59, 119)],\n                        [(10, 13), (16, 30), (33, 23)]],\n            strides=[32, 16, 8]),\n        bbox_coder=dict(type='YOLOBBoxCoder'),\n        featmap_strides=[32, 16, 8],\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            loss_weight=1.0,\n            reduction='sum'),\n        loss_conf=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            loss_weight=1.0,\n            reduction='sum'),\n        loss_xy=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            loss_weight=2.0,\n            reduction='sum'),\n        loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='GridAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0)),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        conf_thr=0.005,\n        nms=dict(type='nms', iou_threshold=0.45),\n        max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(mean=[0, 0, 0], std=[255., 255., 255.], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile', to_float32=True),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 2)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=[(320, 320), (608, 608)], keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='PhotoMetricDistortion'),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(608, 608),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    train=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=train_pipeline),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=2000,  # same as burn-in in darknet\n    warmup_ratio=0.1,\n    step=[218, 246])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=273)\nevaluation = dict(interval=1, metric=['bbox'])\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/yolov3_mobilenetv2_320_300e_coco.py",
    "content": "_base_ = ['./yolov3_mobilenetv2_mstrain-416_300e_coco.py']\n\n# yapf:disable\nmodel = dict(\n    bbox_head=dict(\n        anchor_generator=dict(\n            base_sizes=[[(220, 125), (128, 222), (264, 266)],\n                        [(35, 87), (102, 96), (60, 170)],\n                        [(10, 15), (24, 36), (72, 42)]])))\n# yapf:enable\n\n# dataset settings\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 2)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),\n        min_crop_size=0.3),\n    dict(type='Resize', img_scale=(320, 320), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='PhotoMetricDistortion'),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(320, 320),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='DefaultFormatBundle'),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    train=dict(dataset=dict(pipeline=train_pipeline)),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolo/yolov3_mobilenetv2_mstrain-416_300e_coco.py",
    "content": "_base_ = '../_base_/default_runtime.py'\n# model settings\nmodel = dict(\n    type='YOLOV3',\n    backbone=dict(\n        type='MobileNetV2',\n        out_indices=(2, 4, 6),\n        act_cfg=dict(type='LeakyReLU', negative_slope=0.1),\n        init_cfg=dict(\n            type='Pretrained', checkpoint='open-mmlab://mmdet/mobilenet_v2')),\n    neck=dict(\n        type='YOLOV3Neck',\n        num_scales=3,\n        in_channels=[320, 96, 32],\n        out_channels=[96, 96, 96]),\n    bbox_head=dict(\n        type='YOLOV3Head',\n        num_classes=80,\n        in_channels=[96, 96, 96],\n        out_channels=[96, 96, 96],\n        anchor_generator=dict(\n            type='YOLOAnchorGenerator',\n            base_sizes=[[(116, 90), (156, 198), (373, 326)],\n                        [(30, 61), (62, 45), (59, 119)],\n                        [(10, 13), (16, 30), (33, 23)]],\n            strides=[32, 16, 8]),\n        bbox_coder=dict(type='YOLOBBoxCoder'),\n        featmap_strides=[32, 16, 8],\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            loss_weight=1.0,\n            reduction='sum'),\n        loss_conf=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            loss_weight=1.0,\n            reduction='sum'),\n        loss_xy=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=True,\n            loss_weight=2.0,\n            reduction='sum'),\n        loss_wh=dict(type='MSELoss', loss_weight=2.0, reduction='sum')),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='GridAssigner',\n            pos_iou_thr=0.5,\n            neg_iou_thr=0.5,\n            min_pos_iou=0)),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        conf_thr=0.005,\n        nms=dict(type='nms', iou_threshold=0.45),\n        max_per_img=100))\n# dataset settings\ndataset_type = 'CocoDataset'\ndata_root = 'data/coco/'\nimg_norm_cfg = dict(\n    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(\n        type='Expand',\n        mean=img_norm_cfg['mean'],\n        to_rgb=img_norm_cfg['to_rgb'],\n        ratio_range=(1, 2)),\n    dict(\n        type='MinIoURandomCrop',\n        min_ious=(0.4, 0.5, 0.6, 0.7, 0.8, 0.9),\n        min_crop_size=0.3),\n    dict(\n        type='Resize',\n        img_scale=[(320, 320), (416, 416)],\n        multiscale_mode='range',\n        keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='PhotoMetricDistortion'),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(416, 416),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='DefaultFormatBundle'),\n            dict(type='Collect', keys=['img'])\n        ])\n]\ndata = dict(\n    samples_per_gpu=24,\n    workers_per_gpu=4,\n    train=dict(\n        type='RepeatDataset',  # use RepeatDataset to speed up training\n        times=10,\n        dataset=dict(\n            type=dataset_type,\n            ann_file=data_root + 'annotations/instances_train2017.json',\n            img_prefix=data_root + 'train2017/',\n            pipeline=train_pipeline)),\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n# optimizer\noptimizer = dict(type='SGD', lr=0.003, momentum=0.9, weight_decay=0.0005)\noptimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))\n# learning policy\nlr_config = dict(\n    policy='step',\n    warmup='linear',\n    warmup_iters=4000,\n    warmup_ratio=0.0001,\n    step=[24, 28])\n# runtime settings\nrunner = dict(type='EpochBasedRunner', max_epochs=30)\nevaluation = dict(interval=1, metric=['bbox'])\nfind_unused_parameters = True\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (24 samples per GPU)\nauto_scale_lr = dict(base_batch_size=192)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolof/metafile.yml",
    "content": "Collections:\n  - Name: YOLOF\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Momentum\n        - Weight Decay\n      Training Resources: 8x V100 GPUs\n      Architecture:\n        - Dilated Encoder\n        - ResNet\n    Paper:\n      URL: https://arxiv.org/abs/2103.09460\n      Title: 'You Only Look One-level Feature'\n    README: configs/yolof/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.12.0/mmdet/models/detectors/yolof.py#L6\n      Version: v2.12.0\n\nModels:\n  - Name: yolof_r50_c5_8x8_1x_coco\n    In Collection: YOLOF\n    Config: configs/yolof/yolof_r50_c5_8x8_1x_coco.py\n    Metadata:\n      Training Memory (GB): 8.3\n      Epochs: 12\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 37.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolof/yolof_r50_c5_8x8_1x_coco/yolof_r50_c5_8x8_1x_coco_20210425_024427-8e864411.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolof/yolof_r50_c5_8x8_1x_coco.py",
    "content": "_base_ = [\n    '../_base_/datasets/coco_detection.py',\n    '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'\n]\nmodel = dict(\n    type='YOLOF',\n    backbone=dict(\n        type='ResNet',\n        depth=50,\n        num_stages=4,\n        out_indices=(3, ),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=False),\n        norm_eval=True,\n        style='caffe',\n        init_cfg=dict(\n            type='Pretrained',\n            checkpoint='open-mmlab://detectron/resnet50_caffe')),\n    neck=dict(\n        type='DilatedEncoder',\n        in_channels=2048,\n        out_channels=512,\n        block_mid_channels=128,\n        num_residual_blocks=4,\n        block_dilations=[2, 4, 6, 8]),\n    bbox_head=dict(\n        type='YOLOFHead',\n        num_classes=80,\n        in_channels=512,\n        reg_decoded_bbox=True,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[1, 2, 4, 8, 16],\n            strides=[32]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1., 1., 1., 1.],\n            add_ctr_clamp=True,\n            ctr_clamp=32),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.0)),\n    # training and testing settings\n    train_cfg=dict(\n        assigner=dict(\n            type='UniformAssigner', pos_ignore_thr=0.15, neg_ignore_thr=0.7),\n        allowed_border=-1,\n        pos_weight=-1,\n        debug=False),\n    test_cfg=dict(\n        nms_pre=1000,\n        min_bbox_size=0,\n        score_thr=0.05,\n        nms=dict(type='nms', iou_threshold=0.6),\n        max_per_img=100))\n# optimizer\noptimizer = dict(\n    type='SGD',\n    lr=0.12,\n    momentum=0.9,\n    weight_decay=0.0001,\n    paramwise_cfg=dict(\n        norm_decay_mult=0., custom_keys={'backbone': dict(lr_mult=1. / 3)}))\nlr_config = dict(warmup_iters=1500, warmup_ratio=0.00066667)\n\n# use caffe img_norm\nimg_norm_cfg = dict(\n    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)\ntrain_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(type='LoadAnnotations', with_bbox=True),\n    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='RandomShift', shift_ratio=0.5, max_shift_px=32),\n    dict(type='Normalize', **img_norm_cfg),\n    dict(type='Pad', size_divisor=32),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(1333, 800),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(type='Normalize', **img_norm_cfg),\n            dict(type='Pad', size_divisor=32),\n            dict(type='ImageToTensor', keys=['img']),\n            dict(type='Collect', keys=['img']),\n        ])\n]\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=8,\n    train=dict(pipeline=train_pipeline),\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolof/yolof_r50_c5_8x8_iter-1x_coco.py",
    "content": "_base_ = './yolof_r50_c5_8x8_1x_coco.py'\n\n# We implemented the iter-based config according to the source code.\n# COCO dataset has 117266 images after filtering. We use 8 gpu and\n# 8 batch size training, so 22500 is equivalent to\n# 22500/(117266/(8x8))=12.3 epoch, 15000 is equivalent to 8.2 epoch,\n# 20000 is equivalent to 10.9 epoch. Due to lr(0.12) is large,\n# the iter-based and epoch-based setting have about 0.2 difference on\n# the mAP evaluation value.\nlr_config = dict(step=[15000, 20000])\nrunner = dict(_delete_=True, type='IterBasedRunner', max_iters=22500)\ncheckpoint_config = dict(interval=2500)\nevaluation = dict(interval=4500)\nlog_config = dict(interval=20)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/metafile.yml",
    "content": "Collections:\n  - Name: YOLOX\n    Metadata:\n      Training Data: COCO\n      Training Techniques:\n        - SGD with Nesterov\n        - Weight Decay\n        - Cosine Annealing Lr Updater\n      Training Resources: 8x TITANXp GPUs\n      Architecture:\n        - CSPDarkNet\n        - PAFPN\n    Paper:\n      URL: https://arxiv.org/abs/2107.08430\n      Title: 'YOLOX: Exceeding YOLO Series in 2021'\n    README: configs/yolox/README.md\n    Code:\n      URL: https://github.com/open-mmlab/mmdetection/blob/v2.15.1/mmdet/models/detectors/yolox.py#L6\n      Version: v2.15.1\n\n\nModels:\n  - Name: yolox_s_8x8_300e_coco\n    In Collection: YOLOX\n    Config: configs/yolox/yolox_s_8x8_300e_coco.py\n    Metadata:\n      Training Memory (GB): 7.6\n      Epochs: 300\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 40.5\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_s_8x8_300e_coco/yolox_s_8x8_300e_coco_20211121_095711-4592a793.pth\n  - Name: yolox_l_8x8_300e_coco\n    In Collection: YOLOX\n    Config: configs/yolox/yolox_l_8x8_300e_coco.py\n    Metadata:\n      Training Memory (GB): 19.9\n      Epochs: 300\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 49.4\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_l_8x8_300e_coco/yolox_l_8x8_300e_coco_20211126_140236-d3bd2b23.pth\n  - Name: yolox_x_8x8_300e_coco\n    In Collection: YOLOX\n    Config: configs/yolox/yolox_x_8x8_300e_coco.py\n    Metadata:\n      Training Memory (GB): 28.1\n      Epochs: 300\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 50.9\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_x_8x8_300e_coco/yolox_x_8x8_300e_coco_20211126_140254-1ef88d67.pth\n  - Name: yolox_tiny_8x8_300e_coco\n    In Collection: YOLOX\n    Config: configs/yolox/yolox_tiny_8x8_300e_coco.py\n    Metadata:\n      Training Memory (GB): 3.5\n      Epochs: 300\n    Results:\n      - Task: Object Detection\n        Dataset: COCO\n        Metrics:\n          box AP: 32.0\n    Weights: https://download.openmmlab.com/mmdetection/v2.0/yolox/yolox_tiny_8x8_300e_coco/yolox_tiny_8x8_300e_coco_20211124_171234-b4047906.pth\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/yolox_l_8x8_300e_coco.py",
    "content": "_base_ = './yolox_s_8x8_300e_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(deepen_factor=1.0, widen_factor=1.0),\n    neck=dict(\n        in_channels=[256, 512, 1024], out_channels=256, num_csp_blocks=3),\n    bbox_head=dict(in_channels=256, feat_channels=256))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/yolox_m_8x8_300e_coco.py",
    "content": "_base_ = './yolox_s_8x8_300e_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(deepen_factor=0.67, widen_factor=0.75),\n    neck=dict(in_channels=[192, 384, 768], out_channels=192, num_csp_blocks=2),\n    bbox_head=dict(in_channels=192, feat_channels=192),\n)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/yolox_nano_8x8_300e_coco.py",
    "content": "_base_ = './yolox_tiny_8x8_300e_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(deepen_factor=0.33, widen_factor=0.25, use_depthwise=True),\n    neck=dict(\n        in_channels=[64, 128, 256],\n        out_channels=64,\n        num_csp_blocks=1,\n        use_depthwise=True),\n    bbox_head=dict(in_channels=64, feat_channels=64, use_depthwise=True))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/yolox_s_8x8_300e_coco.py",
    "content": "_base_ = ['../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py']\n\nimg_scale = (640, 640)  # height, width\n\n# model settings\nmodel = dict(\n    type='YOLOX',\n    input_size=img_scale,\n    random_size_range=(15, 25),\n    random_size_interval=10,\n    backbone=dict(type='CSPDarknet', deepen_factor=0.33, widen_factor=0.5),\n    neck=dict(\n        type='YOLOXPAFPN',\n        in_channels=[128, 256, 512],\n        out_channels=128,\n        num_csp_blocks=1),\n    bbox_head=dict(\n        type='YOLOXHead', num_classes=80, in_channels=128, feat_channels=128),\n    train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),\n    # In order to align the source code, the threshold of the val phase is\n    # 0.01, and the threshold of the test phase is 0.001.\n    test_cfg=dict(score_thr=0.01, nms=dict(type='nms', iou_threshold=0.65)))\n\n# dataset settings\ndata_root = 'data/coco/'\ndataset_type = 'CocoDataset'\n\ntrain_pipeline = [\n    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),\n    dict(\n        type='RandomAffine',\n        scaling_ratio_range=(0.1, 2),\n        border=(-img_scale[0] // 2, -img_scale[1] // 2)),\n    dict(\n        type='MixUp',\n        img_scale=img_scale,\n        ratio_range=(0.8, 1.6),\n        pad_val=114.0),\n    dict(type='YOLOXHSVRandomAug'),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    # According to the official implementation, multi-scale\n    # training is not considered here but in the\n    # 'mmdet/models/detectors/yolox.py'.\n    dict(type='Resize', img_scale=img_scale, keep_ratio=True),\n    dict(\n        type='Pad',\n        pad_to_square=True,\n        # If the image is three-channel, the pad value needs\n        # to be set separately for each channel.\n        pad_val=dict(img=(114.0, 114.0, 114.0))),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\n\ntrain_dataset = dict(\n    type='MultiImageMixDataset',\n    dataset=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_train2017.json',\n        img_prefix=data_root + 'train2017/',\n        pipeline=[\n            dict(type='LoadImageFromFile'),\n            dict(type='LoadAnnotations', with_bbox=True)\n        ],\n        filter_empty_gt=False,\n    ),\n    pipeline=train_pipeline)\n\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=img_scale,\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(\n                type='Pad',\n                pad_to_square=True,\n                pad_val=dict(img=(114.0, 114.0, 114.0))),\n            dict(type='DefaultFormatBundle'),\n            dict(type='Collect', keys=['img'])\n        ])\n]\n\ndata = dict(\n    samples_per_gpu=8,\n    workers_per_gpu=4,\n    persistent_workers=True,\n    train=train_dataset,\n    val=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline),\n    test=dict(\n        type=dataset_type,\n        ann_file=data_root + 'annotations/instances_val2017.json',\n        img_prefix=data_root + 'val2017/',\n        pipeline=test_pipeline))\n\n# optimizer\n# default 8 gpu\noptimizer = dict(\n    type='SGD',\n    lr=0.01,\n    momentum=0.9,\n    weight_decay=5e-4,\n    nesterov=True,\n    paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))\noptimizer_config = dict(grad_clip=None)\n\nmax_epochs = 300\nnum_last_epochs = 15\nresume_from = None\ninterval = 10\n\n# learning policy\nlr_config = dict(\n    _delete_=True,\n    policy='YOLOX',\n    warmup='exp',\n    by_epoch=False,\n    warmup_by_epoch=True,\n    warmup_ratio=1,\n    warmup_iters=5,  # 5 epoch\n    num_last_epochs=num_last_epochs,\n    min_lr_ratio=0.05)\n\nrunner = dict(type='EpochBasedRunner', max_epochs=max_epochs)\n\ncustom_hooks = [\n    dict(\n        type='YOLOXModeSwitchHook',\n        num_last_epochs=num_last_epochs,\n        priority=48),\n    dict(\n        type='SyncNormHook',\n        num_last_epochs=num_last_epochs,\n        interval=interval,\n        priority=48),\n    dict(\n        type='ExpMomentumEMAHook',\n        resume_from=resume_from,\n        momentum=0.0001,\n        priority=49)\n]\ncheckpoint_config = dict(interval=interval)\nevaluation = dict(\n    save_best='auto',\n    # The evaluation interval is 'interval' when running epoch is\n    # less than ‘max_epochs - num_last_epochs’.\n    # The evaluation interval is 1 when running epoch is greater than\n    # or equal to ‘max_epochs - num_last_epochs’.\n    interval=interval,\n    dynamic_intervals=[(max_epochs - num_last_epochs, 1)],\n    metric='bbox')\nlog_config = dict(interval=50)\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/yolox_tiny_8x8_300e_coco.py",
    "content": "_base_ = './yolox_s_8x8_300e_coco.py'\n\n# model settings\nmodel = dict(\n    random_size_range=(10, 20),\n    backbone=dict(deepen_factor=0.33, widen_factor=0.375),\n    neck=dict(in_channels=[96, 192, 384], out_channels=96),\n    bbox_head=dict(in_channels=96, feat_channels=96))\n\nimg_scale = (640, 640)  # height, width\n\ntrain_pipeline = [\n    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),\n    dict(\n        type='RandomAffine',\n        scaling_ratio_range=(0.5, 1.5),\n        border=(-img_scale[0] // 2, -img_scale[1] // 2)),\n    dict(type='YOLOXHSVRandomAug'),\n    dict(type='RandomFlip', flip_ratio=0.5),\n    dict(type='Resize', img_scale=img_scale, keep_ratio=True),\n    dict(\n        type='Pad',\n        pad_to_square=True,\n        pad_val=dict(img=(114.0, 114.0, 114.0))),\n    dict(type='FilterAnnotations', min_gt_bbox_wh=(1, 1), keep_empty=False),\n    dict(type='DefaultFormatBundle'),\n    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n]\n\ntest_pipeline = [\n    dict(type='LoadImageFromFile'),\n    dict(\n        type='MultiScaleFlipAug',\n        img_scale=(416, 416),\n        flip=False,\n        transforms=[\n            dict(type='Resize', keep_ratio=True),\n            dict(type='RandomFlip'),\n            dict(\n                type='Pad',\n                pad_to_square=True,\n                pad_val=dict(img=(114.0, 114.0, 114.0))),\n            dict(type='DefaultFormatBundle'),\n            dict(type='Collect', keys=['img'])\n        ])\n]\n\ntrain_dataset = dict(pipeline=train_pipeline)\n\ndata = dict(\n    train=train_dataset,\n    val=dict(pipeline=test_pipeline),\n    test=dict(pipeline=test_pipeline))\n\n# NOTE: `auto_scale_lr` is for automatically scaling LR,\n# USER SHOULD NOT CHANGE ITS VALUES.\n# base_batch_size = (8 GPUs) x (8 samples per GPU)\nauto_scale_lr = dict(base_batch_size=64)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/configs/yolox/yolox_x_8x8_300e_coco.py",
    "content": "_base_ = './yolox_s_8x8_300e_coco.py'\n\n# model settings\nmodel = dict(\n    backbone=dict(deepen_factor=1.33, widen_factor=1.25),\n    neck=dict(\n        in_channels=[320, 640, 1280], out_channels=320, num_csp_blocks=4),\n    bbox_head=dict(in_channels=320, feat_channels=320))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the environment for the first two.\nSPHINXOPTS    ?=\nSPHINXBUILD   ?= sphinx-build\nSOURCEDIR     = .\nBUILDDIR      = _build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/_static/css/readthedocs.css",
    "content": ".header-logo {\n    background-image: url(\"../image/mmdet-logo.png\");\n    background-size: 156px 40px;\n    height: 40px;\n    width: 156px;\n}\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/api.rst",
    "content": "mmdet.apis\n--------------\n.. automodule:: mmdet.apis\n    :members:\n\nmmdet.core\n--------------\n\nanchor\n^^^^^^^^^^\n.. automodule:: mmdet.core.anchor\n    :members:\n\nbbox\n^^^^^^^^^^\n.. automodule:: mmdet.core.bbox\n    :members:\n\nexport\n^^^^^^^^^^\n.. automodule:: mmdet.core.export\n    :members:\n\nmask\n^^^^^^^^^^\n.. automodule:: mmdet.core.mask\n    :members:\n\nevaluation\n^^^^^^^^^^\n.. automodule:: mmdet.core.evaluation\n    :members:\n\npost_processing\n^^^^^^^^^^^^^^^\n.. automodule:: mmdet.core.post_processing\n    :members:\n\nutils\n^^^^^^^^^^\n.. automodule:: mmdet.core.utils\n    :members:\n\nmmdet.datasets\n--------------\n\ndatasets\n^^^^^^^^^^\n.. automodule:: mmdet.datasets\n    :members:\n\npipelines\n^^^^^^^^^^\n.. automodule:: mmdet.datasets.pipelines\n    :members:\n\nsamplers\n^^^^^^^^^^\n.. automodule:: mmdet.datasets.samplers\n    :members:\n\napi_wrappers\n^^^^^^^^^^^^\n.. automodule:: mmdet.datasets.api_wrappers\n    :members:\n\nmmdet.models\n--------------\n\ndetectors\n^^^^^^^^^^\n.. automodule:: mmdet.models.detectors\n    :members:\n\nbackbones\n^^^^^^^^^^\n.. automodule:: mmdet.models.backbones\n    :members:\n\nnecks\n^^^^^^^^^^^^\n.. automodule:: mmdet.models.necks\n    :members:\n\ndense_heads\n^^^^^^^^^^^^\n.. automodule:: mmdet.models.dense_heads\n    :members:\n\nroi_heads\n^^^^^^^^^^\n.. automodule:: mmdet.models.roi_heads\n    :members:\n\nlosses\n^^^^^^^^^^\n.. automodule:: mmdet.models.losses\n    :members:\n\nutils\n^^^^^^^^^^\n.. automodule:: mmdet.models.utils\n    :members:\n\nmmdet.utils\n--------------\n.. automodule::mmdet.utils\n    :members:\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/conf.py",
    "content": "# Configuration file for the Sphinx documentation builder.\n#\n# This file only contains a selection of the most common options. For a full\n# list see the documentation:\n# https://www.sphinx-doc.org/en/master/usage/configuration.html\n\n# -- Path setup --------------------------------------------------------------\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\nimport os\nimport subprocess\nimport sys\n\nimport pytorch_sphinx_theme\n\nsys.path.insert(0, os.path.abspath('../..'))\n\n# -- Project information -----------------------------------------------------\n\nproject = 'MMDetection'\ncopyright = '2018-2021, OpenMMLab'\nauthor = 'MMDetection Authors'\nversion_file = '../../mmdet/version.py'\n\n\ndef get_version():\n    with open(version_file, 'r') as f:\n        exec(compile(f.read(), version_file, 'exec'))\n    return locals()['__version__']\n\n\n# The full version, including alpha/beta/rc tags\nrelease = get_version()\n\n# -- General configuration ---------------------------------------------------\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    'sphinx.ext.autodoc',\n    'sphinx.ext.napoleon',\n    'sphinx.ext.viewcode',\n    'myst_parser',\n    'sphinx_markdown_tables',\n    'sphinx_copybutton',\n]\n\nmyst_enable_extensions = ['colon_fence']\nmyst_heading_anchors = 3\n\nautodoc_mock_imports = [\n    'matplotlib', 'pycocotools', 'terminaltables', 'mmdet.version', 'mmcv.ops'\n]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = ['_templates']\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\n#\nsource_suffix = {\n    '.rst': 'restructuredtext',\n    '.md': 'markdown',\n}\n\n# The master toctree document.\nmaster_doc = 'index'\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This pattern also affects html_static_path and html_extra_path.\nexclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']\n\n# -- Options for HTML output -------------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\n# html_theme = 'sphinx_rtd_theme'\nhtml_theme = 'pytorch_sphinx_theme'\nhtml_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]\n\nhtml_theme_options = {\n    'menu': [\n        {\n            'name': 'GitHub',\n            'url': 'https://github.com/open-mmlab/mmdetection'\n        },\n    ],\n    # Specify the language of shared menu\n    'menu_lang':\n    'en'\n}\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = ['_static']\nhtml_css_files = ['css/readthedocs.css']\n\n# -- Extension configuration -------------------------------------------------\n# Ignore >>> when copying code\ncopybutton_prompt_text = r'>>> |\\.\\.\\. '\ncopybutton_prompt_is_regexp = True\n\n\ndef builder_inited_handler(app):\n    subprocess.run(['./stat.py'])\n\n\ndef setup(app):\n    app.connect('builder-inited', builder_inited_handler)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/index.rst",
    "content": "Welcome to MMDetection's documentation!\n=======================================\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Get Started\n\n   get_started.md\n   modelzoo_statistics.md\n   model_zoo.md\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Quick Run\n\n   1_exist_data_model.md\n   2_new_data_model.md\n   3_exist_data_new_model.md\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Tutorials\n\n   tutorials/index.rst\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Useful Tools and Scripts\n\n   useful_tools.md\n\n.. toctree::\n   :maxdepth: 2\n   :caption: Notes\n\n   conventions.md\n   compatibility.md\n   projects.md\n   changelog.md\n   faq.md\n\n.. toctree::\n   :caption: Switch Language\n\n   switch_language.md\n\n.. toctree::\n   :maxdepth: 1\n   :caption: API Reference\n\n   api.rst\n\n.. toctree::\n   :maxdepth: 1\n   :caption: Device Support\n\n   device/npu.md\n\nIndices and tables\n==================\n\n* :ref:`genindex`\n* :ref:`search`\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/make.bat",
    "content": "@ECHO OFF\n\npushd %~dp0\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-build\n)\nset SOURCEDIR=.\nset BUILDDIR=_build\n\nif \"%1\" == \"\" goto help\n\n%SPHINXBUILD% >NUL 2>NUL\nif errorlevel 9009 (\n\techo.\n\techo.The 'sphinx-build' command was not found. Make sure you have Sphinx\n\techo.installed, then set the SPHINXBUILD environment variable to point\n\techo.to the full path of the 'sphinx-build' executable. Alternatively you\n\techo.may add the Sphinx directory to PATH.\n\techo.\n\techo.If you don't have Sphinx installed, grab it from\n\techo.http://sphinx-doc.org/\n\texit /b 1\n)\n\n%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%\ngoto end\n\n:help\n%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%\n\n:end\npopd\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/stat.py",
    "content": "#!/usr/bin/env python\nimport functools as func\nimport glob\nimport os.path as osp\nimport re\n\nimport numpy as np\n\nurl_prefix = 'https://github.com/open-mmlab/mmdetection/blob/master/configs'\n\nfiles = sorted(glob.glob('../../configs/*/README.md'))\n\nstats = []\ntitles = []\nnum_ckpts = 0\n\nfor f in files:\n    url = osp.dirname(f.replace('../../configs', url_prefix))\n\n    with open(f, 'r') as content_file:\n        content = content_file.read()\n\n    title = content.split('\\n')[0].replace('# ', '').strip()\n    ckpts = set(x.lower().strip()\n                for x in re.findall(r'\\[model\\]\\((https?.*)\\)', content))\n\n    if len(ckpts) == 0:\n        continue\n\n    _papertype = [x for x in re.findall(r'\\[([A-Z]+)\\]', content)]\n    assert len(_papertype) > 0\n    papertype = _papertype[0]\n\n    paper = set([(papertype, title)])\n\n    titles.append(title)\n    num_ckpts += len(ckpts)\n\n    statsmsg = f\"\"\"\n\\t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts)\n\"\"\"\n    stats.append((paper, ckpts, statsmsg))\n\nallpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats])\nmsglist = '\\n'.join(x for _, _, x in stats)\n\npapertypes, papercounts = np.unique([t for t, _ in allpapers],\n                                    return_counts=True)\ncountstr = '\\n'.join(\n    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])\n\nmodelzoo = f\"\"\"\n# Model Zoo Statistics\n\n* Number of papers: {len(set(titles))}\n{countstr}\n\n* Number of checkpoints: {num_ckpts}\n\n{msglist}\n\"\"\"\n\nwith open('modelzoo_statistics.md', 'w') as f:\n    f.write(modelzoo)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/en/tutorials/index.rst",
    "content": ".. toctree::\n   :maxdepth: 2\n\n   config.md\n   customize_dataset.md\n   data_pipeline.md\n   customize_models.md\n   customize_runtime.md\n   customize_losses.md\n   finetune.md\n   robustness_benchmarking.md\n   pytorch2onnx.md\n   onnx2tensorrt.md\n   init_cfg.md\n   how_to.md\n   test_results_submission.md\n   useful_hooks.md\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/Makefile",
    "content": "# Minimal makefile for Sphinx documentation\n#\n\n# You can set these variables from the command line, and also\n# from the environment for the first two.\nSPHINXOPTS    ?=\nSPHINXBUILD   ?= sphinx-build\nSOURCEDIR     = .\nBUILDDIR      = _build\n\n# Put it first so that \"make\" without argument is like \"make help\".\nhelp:\n\t@$(SPHINXBUILD) -M help \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n\n.PHONY: help Makefile\n\n# Catch-all target: route all unknown targets to Sphinx using the new\n# \"make mode\" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).\n%: Makefile\n\t@$(SPHINXBUILD) -M $@ \"$(SOURCEDIR)\" \"$(BUILDDIR)\" $(SPHINXOPTS) $(O)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/_static/css/readthedocs.css",
    "content": ".header-logo {\n    background-image: url(\"../image/mmdet-logo.png\");\n    background-size: 156px 40px;\n    height: 40px;\n    width: 156px;\n}\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/api.rst",
    "content": "mmdet.apis\n--------------\n.. automodule:: mmdet.apis\n    :members:\n\nmmdet.core\n--------------\n\nanchor\n^^^^^^^^^^\n.. automodule:: mmdet.core.anchor\n    :members:\n\nbbox\n^^^^^^^^^^\n.. automodule:: mmdet.core.bbox\n    :members:\n\nexport\n^^^^^^^^^^\n.. automodule:: mmdet.core.export\n    :members:\n\nmask\n^^^^^^^^^^\n.. automodule:: mmdet.core.mask\n    :members:\n\nevaluation\n^^^^^^^^^^\n.. automodule:: mmdet.core.evaluation\n    :members:\n\npost_processing\n^^^^^^^^^^^^^^^\n.. automodule:: mmdet.core.post_processing\n    :members:\n\nutils\n^^^^^^^^^^\n.. automodule:: mmdet.core.utils\n    :members:\n\nmmdet.datasets\n--------------\n\ndatasets\n^^^^^^^^^^\n.. automodule:: mmdet.datasets\n    :members:\n\npipelines\n^^^^^^^^^^\n.. automodule:: mmdet.datasets.pipelines\n    :members:\n\nsamplers\n^^^^^^^^^^\n.. automodule:: mmdet.datasets.samplers\n    :members:\n\napi_wrappers\n^^^^^^^^^^\n.. automodule:: mmdet.datasets.api_wrappers\n    :members:\n\nmmdet.models\n--------------\n\ndetectors\n^^^^^^^^^^\n.. automodule:: mmdet.models.detectors\n    :members:\n\nbackbones\n^^^^^^^^^^\n.. automodule:: mmdet.models.backbones\n    :members:\n\nnecks\n^^^^^^^^^^^^\n.. automodule:: mmdet.models.necks\n    :members:\n\ndense_heads\n^^^^^^^^^^^^\n.. automodule:: mmdet.models.dense_heads\n    :members:\n\nroi_heads\n^^^^^^^^^^\n.. automodule:: mmdet.models.roi_heads\n    :members:\n\nlosses\n^^^^^^^^^^\n.. automodule:: mmdet.models.losses\n    :members:\n\nutils\n^^^^^^^^^^\n.. automodule:: mmdet.models.utils\n    :members:\n\nmmdet.utils\n--------------\n.. automodule::mmdet.utils\n    :members:\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/conf.py",
    "content": "# Configuration file for the Sphinx documentation builder.\n#\n# This file only contains a selection of the most common options. For a full\n# list see the documentation:\n# https://www.sphinx-doc.org/en/master/usage/configuration.html\n\n# -- Path setup --------------------------------------------------------------\n\n# If extensions (or modules to document with autodoc) are in another directory,\n# add these directories to sys.path here. If the directory is relative to the\n# documentation root, use os.path.abspath to make it absolute, like shown here.\n#\nimport os\nimport subprocess\nimport sys\n\nimport pytorch_sphinx_theme\n\nsys.path.insert(0, os.path.abspath('../../'))\n\n# -- Project information -----------------------------------------------------\n\nproject = 'MMDetection'\ncopyright = '2018-2021, OpenMMLab'\nauthor = 'MMDetection Authors'\nversion_file = '../../mmdet/version.py'\n\n\ndef get_version():\n    with open(version_file, 'r') as f:\n        exec(compile(f.read(), version_file, 'exec'))\n    return locals()['__version__']\n\n\n# The full version, including alpha/beta/rc tags\nrelease = get_version()\n\n# -- General configuration ---------------------------------------------------\n\n# Add any Sphinx extension module names here, as strings. They can be\n# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom\n# ones.\nextensions = [\n    'sphinx.ext.autodoc',\n    'sphinx.ext.napoleon',\n    'sphinx.ext.viewcode',\n    'myst_parser',\n    'sphinx_markdown_tables',\n    'sphinx_copybutton',\n]\n\nmyst_enable_extensions = ['colon_fence']\nmyst_heading_anchors = 3\n\nautodoc_mock_imports = [\n    'matplotlib', 'pycocotools', 'terminaltables', 'mmdet.version', 'mmcv.ops'\n]\n\n# Add any paths that contain templates here, relative to this directory.\ntemplates_path = ['_templates']\n\n# The suffix(es) of source filenames.\n# You can specify multiple suffix as a list of string:\n#\nsource_suffix = {\n    '.rst': 'restructuredtext',\n    '.md': 'markdown',\n}\n\n# The master toctree document.\nmaster_doc = 'index'\n\n# List of patterns, relative to source directory, that match files and\n# directories to ignore when looking for source files.\n# This pattern also affects html_static_path and html_extra_path.\nexclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']\n\n# -- Options for HTML output -------------------------------------------------\n\n# The theme to use for HTML and HTML Help pages.  See the documentation for\n# a list of builtin themes.\n#\n# html_theme = 'sphinx_rtd_theme'\nhtml_theme = 'pytorch_sphinx_theme'\nhtml_theme_path = [pytorch_sphinx_theme.get_html_theme_path()]\n\nhtml_theme_options = {\n    'menu': [\n        {\n            'name': 'GitHub',\n            'url': 'https://github.com/open-mmlab/mmdetection'\n        },\n    ],\n    # Specify the language of shared menu\n    'menu_lang':\n    'cn',\n}\n\n# Add any paths that contain custom static files (such as style sheets) here,\n# relative to this directory. They are copied after the builtin static files,\n# so a file named \"default.css\" will overwrite the builtin \"default.css\".\nhtml_static_path = ['_static']\nhtml_css_files = ['css/readthedocs.css']\n\nlanguage = 'zh_CN'\n\n# -- Extension configuration -------------------------------------------------\n# Ignore >>> when copying code\ncopybutton_prompt_text = r'>>> |\\.\\.\\. '\ncopybutton_prompt_is_regexp = True\n\n\ndef builder_inited_handler(app):\n    subprocess.run(['./stat.py'])\n\n\ndef setup(app):\n    app.connect('builder-inited', builder_inited_handler)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/index.rst",
    "content": "Welcome to MMDetection's documentation!\n=======================================\n\n.. toctree::\n   :maxdepth: 2\n   :caption: 开始你的第一步\n\n   get_started.md\n   model_zoo.md\n   article.md\n\n.. toctree::\n   :maxdepth: 2\n   :caption: 快速启动\n\n   1_exist_data_model.md\n   2_new_data_model.md\n   3_exist_data_new_model.md\n\n.. toctree::\n   :maxdepth: 2\n   :caption: 教程\n\n   tutorials/index.rst\n\n.. toctree::\n   :maxdepth: 2\n   :caption: 实用工具与脚本\n\n   useful_tools.md\n\n.. toctree::\n   :maxdepth: 2\n   :caption: 说明\n\n   conventions.md\n   compatibility.md\n   faq.md\n\n.. toctree::\n   :caption: 语言切换\n\n   switch_language.md\n\n.. toctree::\n   :maxdepth: 1\n   :caption: 接口文档（英文）\n\n   api.rst\n\n\nIndices and tables\n==================\n\n* :ref:`genindex`\n* :ref:`search`\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/make.bat",
    "content": "@ECHO OFF\n\npushd %~dp0\n\nREM Command file for Sphinx documentation\n\nif \"%SPHINXBUILD%\" == \"\" (\n\tset SPHINXBUILD=sphinx-build\n)\nset SOURCEDIR=.\nset BUILDDIR=_build\n\nif \"%1\" == \"\" goto help\n\n%SPHINXBUILD% >NUL 2>NUL\nif errorlevel 9009 (\n\techo.\n\techo.The 'sphinx-build' command was not found. Make sure you have Sphinx\n\techo.installed, then set the SPHINXBUILD environment variable to point\n\techo.to the full path of the 'sphinx-build' executable. Alternatively you\n\techo.may add the Sphinx directory to PATH.\n\techo.\n\techo.If you don't have Sphinx installed, grab it from\n\techo.http://sphinx-doc.org/\n\texit /b 1\n)\n\n%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%\ngoto end\n\n:help\n%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%\n\n:end\npopd\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/stat.py",
    "content": "#!/usr/bin/env python\nimport functools as func\nimport glob\nimport os.path as osp\nimport re\n\nimport numpy as np\n\nurl_prefix = 'https://github.com/open-mmlab/mmdetection/blob/master/'\n\nfiles = sorted(glob.glob('../configs/*/README.md'))\n\nstats = []\ntitles = []\nnum_ckpts = 0\n\nfor f in files:\n    url = osp.dirname(f.replace('../', url_prefix))\n\n    with open(f, 'r') as content_file:\n        content = content_file.read()\n\n    title = content.split('\\n')[0].replace('# ', '').strip()\n    ckpts = set(x.lower().strip()\n                for x in re.findall(r'\\[model\\]\\((https?.*)\\)', content))\n\n    if len(ckpts) == 0:\n        continue\n\n    _papertype = [x for x in re.findall(r'\\[([A-Z]+)\\]', content)]\n    assert len(_papertype) > 0\n    papertype = _papertype[0]\n\n    paper = set([(papertype, title)])\n\n    titles.append(title)\n    num_ckpts += len(ckpts)\n\n    statsmsg = f\"\"\"\n\\t* [{papertype}] [{title}]({url}) ({len(ckpts)} ckpts)\n\"\"\"\n    stats.append((paper, ckpts, statsmsg))\n\nallpapers = func.reduce(lambda a, b: a.union(b), [p for p, _, _ in stats])\nmsglist = '\\n'.join(x for _, _, x in stats)\n\npapertypes, papercounts = np.unique([t for t, _ in allpapers],\n                                    return_counts=True)\ncountstr = '\\n'.join(\n    [f'   - {t}: {c}' for t, c in zip(papertypes, papercounts)])\n\nmodelzoo = f\"\"\"\n# Model Zoo Statistics\n\n* Number of papers: {len(set(titles))}\n{countstr}\n\n* Number of checkpoints: {num_ckpts}\n\n{msglist}\n\"\"\"\n\nwith open('modelzoo_statistics.md', 'w') as f:\n    f.write(modelzoo)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/docs/zh_cn/tutorials/index.rst",
    "content": ".. toctree::\n   :maxdepth: 2\n\n   config.md\n   customize_dataset.md\n   data_pipeline.md\n   customize_models.md\n   customize_runtime.md\n   customize_losses.md\n   finetune.md\n   pytorch2onnx.md\n   onnx2tensorrt.md\n   init_cfg.md\n   how_to.md\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\n\nfrom .version import __version__, short_version\n\n\ndef digit_version(version_str):\n    digit_version = []\n    for x in version_str.split('.'):\n        if x.isdigit():\n            digit_version.append(int(x))\n        elif x.find('rc') != -1:\n            patch_version = x.split('rc')\n            digit_version.append(int(patch_version[0]) - 1)\n            digit_version.append(int(patch_version[1]))\n    return digit_version\n\n\nmmcv_minimum_version = '1.3.17'\nmmcv_maximum_version = '1.8.0'\nmmcv_version = digit_version(mmcv.__version__)\n\n\nassert (mmcv_version >= digit_version(mmcv_minimum_version)\n        and mmcv_version <= digit_version(mmcv_maximum_version)), \\\n    f'MMCV=={mmcv.__version__} is used but incompatible. ' \\\n    f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'\n\n__all__ = ['__version__', 'short_version']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/apis/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .inference import (async_inference_detector, inference_detector,\n                        init_detector, show_result_pyplot)\nfrom .test import multi_gpu_test, single_gpu_test\nfrom .train import (get_root_logger, init_random_seed, set_random_seed,\n                    train_detector)\n\n__all__ = [\n    'get_root_logger', 'set_random_seed', 'train_detector', 'init_detector',\n    'async_inference_detector', 'inference_detector', 'show_result_pyplot',\n    'multi_gpu_test', 'single_gpu_test', 'init_random_seed'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/apis/inference.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom pathlib import Path\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.ops import RoIPool\nfrom mmcv.parallel import collate, scatter\nfrom mmcv.runner import load_checkpoint\n\nfrom mmdet.core import get_classes\nfrom mmdet.datasets import replace_ImageToTensor\nfrom mmdet.datasets.pipelines import Compose\nfrom mmdet.models import build_detector\n\n\ndef init_detector(config, checkpoint=None, device='cuda:0', cfg_options=None):\n    \"\"\"Initialize a detector from config file.\n\n    Args:\n        config (str, :obj:`Path`, or :obj:`mmcv.Config`): Config file path,\n            :obj:`Path`, or the config object.\n        checkpoint (str, optional): Checkpoint path. If left as None, the model\n            will not load any weights.\n        cfg_options (dict): Options to override some settings in the used\n            config.\n\n    Returns:\n        nn.Module: The constructed detector.\n    \"\"\"\n    if isinstance(config, (str, Path)):\n        config = mmcv.Config.fromfile(config)\n    elif not isinstance(config, mmcv.Config):\n        raise TypeError('config must be a filename or Config object, '\n                        f'but got {type(config)}')\n    if cfg_options is not None:\n        config.merge_from_dict(cfg_options)\n    if 'pretrained' in config.model:\n        config.model.pretrained = None\n    elif 'init_cfg' in config.model.backbone:\n        config.model.backbone.init_cfg = None\n    config.model.train_cfg = None\n    model = build_detector(config.model, test_cfg=config.get('test_cfg'))\n    if checkpoint is not None:\n        checkpoint = load_checkpoint(model, checkpoint, map_location='cpu')\n        if 'CLASSES' in checkpoint.get('meta', {}):\n            model.CLASSES = checkpoint['meta']['CLASSES']\n        else:\n            warnings.simplefilter('once')\n            warnings.warn('Class names are not saved in the checkpoint\\'s '\n                          'meta data, use COCO classes by default.')\n            model.CLASSES = get_classes('coco')\n    model.cfg = config  # save the config in the model for convenience\n    model.to(device)\n    model.eval()\n\n    if device == 'npu':\n        from mmcv.device.npu import NPUDataParallel\n        model = NPUDataParallel(model)\n        model.cfg = config\n\n    return model\n\n\nclass LoadImage:\n    \"\"\"Deprecated.\n\n    A simple pipeline to load image.\n    \"\"\"\n\n    def __call__(self, results):\n        \"\"\"Call function to load images into results.\n\n        Args:\n            results (dict): A result dict contains the file name\n                of the image to be read.\n        Returns:\n            dict: ``results`` will be returned containing loaded image.\n        \"\"\"\n        warnings.simplefilter('once')\n        warnings.warn('`LoadImage` is deprecated and will be removed in '\n                      'future releases. You may use `LoadImageFromWebcam` '\n                      'from `mmdet.datasets.pipelines.` instead.')\n        if isinstance(results['img'], str):\n            results['filename'] = results['img']\n            results['ori_filename'] = results['img']\n        else:\n            results['filename'] = None\n            results['ori_filename'] = None\n        img = mmcv.imread(results['img'])\n        results['img'] = img\n        results['img_fields'] = ['img']\n        results['img_shape'] = img.shape\n        results['ori_shape'] = img.shape\n        return results\n\n\ndef inference_detector(model, imgs):\n    \"\"\"Inference image(s) with the detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        imgs (str/ndarray or list[str/ndarray] or tuple[str/ndarray]):\n           Either image files or loaded images.\n\n    Returns:\n        If imgs is a list or tuple, the same length list type results\n        will be returned, otherwise return the detection results directly.\n    \"\"\"\n\n    if isinstance(imgs, (list, tuple)):\n        is_batch = True\n    else:\n        imgs = [imgs]\n        is_batch = False\n\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n\n    if isinstance(imgs[0], np.ndarray):\n        cfg = cfg.copy()\n        # set loading pipeline type\n        cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'\n\n    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)\n    test_pipeline = Compose(cfg.data.test.pipeline)\n\n    datas = []\n    for img in imgs:\n        # prepare data\n        if isinstance(img, np.ndarray):\n            # directly add img\n            data = dict(img=img)\n        else:\n            # add information into dict\n            data = dict(img_info=dict(filename=img), img_prefix=None)\n        # build the data pipeline\n        data = test_pipeline(data)\n        datas.append(data)\n\n    data = collate(datas, samples_per_gpu=len(imgs))\n    # just get the actual data from DataContainer\n    data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']]\n    data['img'] = [img.data[0] for img in data['img']]\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device])[0]\n    else:\n        for m in model.modules():\n            assert not isinstance(\n                m, RoIPool\n            ), 'CPU inference with RoIPool is not supported currently.'\n\n    # forward the model\n    with torch.no_grad():\n        results = model(return_loss=False, rescale=True, **data)\n\n    if not is_batch:\n        return results[0]\n    else:\n        return results\n\n\nasync def async_inference_detector(model, imgs):\n    \"\"\"Async inference image(s) with the detector.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        img (str | ndarray): Either image files or loaded images.\n\n    Returns:\n        Awaitable detection results.\n    \"\"\"\n    if not isinstance(imgs, (list, tuple)):\n        imgs = [imgs]\n\n    cfg = model.cfg\n    device = next(model.parameters()).device  # model device\n\n    if isinstance(imgs[0], np.ndarray):\n        cfg = cfg.copy()\n        # set loading pipeline type\n        cfg.data.test.pipeline[0].type = 'LoadImageFromWebcam'\n\n    cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)\n    test_pipeline = Compose(cfg.data.test.pipeline)\n\n    datas = []\n    for img in imgs:\n        # prepare data\n        if isinstance(img, np.ndarray):\n            # directly add img\n            data = dict(img=img)\n        else:\n            # add information into dict\n            data = dict(img_info=dict(filename=img), img_prefix=None)\n        # build the data pipeline\n        data = test_pipeline(data)\n        datas.append(data)\n\n    data = collate(datas, samples_per_gpu=len(imgs))\n    # just get the actual data from DataContainer\n    data['img_metas'] = [img_metas.data[0] for img_metas in data['img_metas']]\n    data['img'] = [img.data[0] for img in data['img']]\n    if next(model.parameters()).is_cuda:\n        # scatter to specified GPU\n        data = scatter(data, [device])[0]\n    else:\n        for m in model.modules():\n            assert not isinstance(\n                m, RoIPool\n            ), 'CPU inference with RoIPool is not supported currently.'\n\n    # We don't restore `torch.is_grad_enabled()` value during concurrent\n    # inference since execution can overlap\n    torch.set_grad_enabled(False)\n    results = await model.aforward_test(rescale=True, **data)\n    return results\n\n\ndef show_result_pyplot(model,\n                       img,\n                       result,\n                       score_thr=0.3,\n                       title='result',\n                       wait_time=0,\n                       palette=None,\n                       out_file=None):\n    \"\"\"Visualize the detection results on the image.\n\n    Args:\n        model (nn.Module): The loaded detector.\n        img (str or np.ndarray): Image filename or loaded image.\n        result (tuple[list] or list): The detection result, can be either\n            (bbox, segm) or just bbox.\n        score_thr (float): The threshold to visualize the bboxes and masks.\n        title (str): Title of the pyplot figure.\n        wait_time (float): Value of waitKey param. Default: 0.\n        palette (str or tuple(int) or :obj:`Color`): Color.\n            The tuple of color should be in BGR order.\n        out_file (str or None): The path to write the image.\n            Default: None.\n    \"\"\"\n    if hasattr(model, 'module'):\n        model = model.module\n    model.show_result(\n        img,\n        result,\n        score_thr=score_thr,\n        show=True,\n        wait_time=wait_time,\n        win_name=title,\n        bbox_color=palette,\n        text_color=(200, 200, 200),\n        mask_color=palette,\n        out_file=out_file)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/apis/test.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport pickle\nimport shutil\nimport tempfile\nimport time\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv.image import tensor2imgs\nfrom mmcv.runner import get_dist_info\n\nfrom mmdet.core import encode_mask_results\n\n\ndef single_gpu_test(model,\n                    data_loader,\n                    show=False,\n                    out_dir=None,\n                    show_score_thr=0.3):\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    PALETTE = getattr(dataset, 'PALETTE', None)\n    prog_bar = mmcv.ProgressBar(len(dataset))\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n\n        batch_size = len(result)\n        if show or out_dir:\n            if batch_size == 1 and isinstance(data['img'][0], torch.Tensor):\n                img_tensor = data['img'][0]\n            else:\n                img_tensor = data['img'][0].data[0]\n            img_metas = data['img_metas'][0].data[0]\n            imgs = tensor2imgs(img_tensor, **img_metas[0]['img_norm_cfg'])\n            assert len(imgs) == len(img_metas)\n\n            for i, (img, img_meta) in enumerate(zip(imgs, img_metas)):\n                h, w, _ = img_meta['img_shape']\n                img_show = img[:h, :w, :]\n\n                ori_h, ori_w = img_meta['ori_shape'][:-1]\n                img_show = mmcv.imresize(img_show, (ori_w, ori_h))\n\n                if out_dir:\n                    out_file = osp.join(out_dir, img_meta['ori_filename'])\n                else:\n                    out_file = None\n\n                model.module.show_result(\n                    img_show,\n                    result[i],\n                    bbox_color=PALETTE,\n                    text_color=PALETTE,\n                    mask_color=PALETTE,\n                    show=show,\n                    out_file=out_file,\n                    score_thr=show_score_thr)\n\n        # encode mask results\n        if isinstance(result[0], tuple):\n            result = [(bbox_results, encode_mask_results(mask_results))\n                      for bbox_results, mask_results in result]\n        # This logic is only used in panoptic segmentation test.\n        elif isinstance(result[0], dict) and 'ins_results' in result[0]:\n            for j in range(len(result)):\n                bbox_results, mask_results = result[j]['ins_results']\n                result[j]['ins_results'] = (bbox_results,\n                                            encode_mask_results(mask_results))\n\n        results.extend(result)\n\n        for _ in range(batch_size):\n            prog_bar.update()\n    return results\n\n\ndef multi_gpu_test(model, data_loader, tmpdir=None, gpu_collect=False):\n    \"\"\"Test model with multiple gpus.\n\n    This method tests model with multiple gpus and collects the results\n    under two different modes: gpu and cpu modes. By setting 'gpu_collect=True'\n    it encodes results to gpu tensors and use gpu communication for results\n    collection. On cpu mode it saves the results on different gpus to 'tmpdir'\n    and collects them by the rank 0 worker.\n\n    Args:\n        model (nn.Module): Model to be tested.\n        data_loader (nn.Dataloader): Pytorch data loader.\n        tmpdir (str): Path of directory to save the temporary results from\n            different gpus under cpu mode.\n        gpu_collect (bool): Option to use either gpu or cpu to collect results.\n\n    Returns:\n        list: The prediction results.\n    \"\"\"\n    model.eval()\n    results = []\n    dataset = data_loader.dataset\n    rank, world_size = get_dist_info()\n    if rank == 0:\n        prog_bar = mmcv.ProgressBar(len(dataset))\n    time.sleep(2)  # This line can prevent deadlock problem in some cases.\n    for i, data in enumerate(data_loader):\n        with torch.no_grad():\n            result = model(return_loss=False, rescale=True, **data)\n            # encode mask results\n            if isinstance(result[0], tuple):\n                result = [(bbox_results, encode_mask_results(mask_results))\n                          for bbox_results, mask_results in result]\n            # This logic is only used in panoptic segmentation test.\n            elif isinstance(result[0], dict) and 'ins_results' in result[0]:\n                for j in range(len(result)):\n                    bbox_results, mask_results = result[j]['ins_results']\n                    result[j]['ins_results'] = (\n                        bbox_results, encode_mask_results(mask_results))\n\n        results.extend(result)\n\n        if rank == 0:\n            batch_size = len(result)\n            for _ in range(batch_size * world_size):\n                prog_bar.update()\n\n    # collect results from all ranks\n    if gpu_collect:\n        results = collect_results_gpu(results, len(dataset))\n    else:\n        results = collect_results_cpu(results, len(dataset), tmpdir)\n    return results\n\n\ndef collect_results_cpu(result_part, size, tmpdir=None):\n    rank, world_size = get_dist_info()\n    # create a tmp dir if it is not specified\n    if tmpdir is None:\n        MAX_LEN = 512\n        # 32 is whitespace\n        dir_tensor = torch.full((MAX_LEN, ),\n                                32,\n                                dtype=torch.uint8,\n                                device='cuda')\n        if rank == 0:\n            mmcv.mkdir_or_exist('.dist_test')\n            tmpdir = tempfile.mkdtemp(dir='.dist_test')\n            tmpdir = torch.tensor(\n                bytearray(tmpdir.encode()), dtype=torch.uint8, device='cuda')\n            dir_tensor[:len(tmpdir)] = tmpdir\n        dist.broadcast(dir_tensor, 0)\n        tmpdir = dir_tensor.cpu().numpy().tobytes().decode().rstrip()\n    else:\n        mmcv.mkdir_or_exist(tmpdir)\n    # dump the part result to the dir\n    mmcv.dump(result_part, osp.join(tmpdir, f'part_{rank}.pkl'))\n    dist.barrier()\n    # collect all parts\n    if rank != 0:\n        return None\n    else:\n        # load results of all parts from tmp dir\n        part_list = []\n        for i in range(world_size):\n            part_file = osp.join(tmpdir, f'part_{i}.pkl')\n            part_list.append(mmcv.load(part_file))\n        # sort the results\n        ordered_results = []\n        for res in zip(*part_list):\n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results[:size]\n        # remove tmp dir\n        shutil.rmtree(tmpdir)\n        return ordered_results\n\n\ndef collect_results_gpu(result_part, size):\n    rank, world_size = get_dist_info()\n    # dump result part to tensor with pickle\n    part_tensor = torch.tensor(\n        bytearray(pickle.dumps(result_part)), dtype=torch.uint8, device='cuda')\n    # gather all result part tensor shape\n    shape_tensor = torch.tensor(part_tensor.shape, device='cuda')\n    shape_list = [shape_tensor.clone() for _ in range(world_size)]\n    dist.all_gather(shape_list, shape_tensor)\n    # padding result part tensor to max length\n    shape_max = torch.tensor(shape_list).max()\n    part_send = torch.zeros(shape_max, dtype=torch.uint8, device='cuda')\n    part_send[:shape_tensor[0]] = part_tensor\n    part_recv_list = [\n        part_tensor.new_zeros(shape_max) for _ in range(world_size)\n    ]\n    # gather all result part\n    dist.all_gather(part_recv_list, part_send)\n\n    if rank == 0:\n        part_list = []\n        for recv, shape in zip(part_recv_list, shape_list):\n            part_list.append(\n                pickle.loads(recv[:shape[0]].cpu().numpy().tobytes()))\n        # sort the results\n        ordered_results = []\n        for res in zip(*part_list):\n            ordered_results.extend(list(res))\n        # the dataloader may pad some samples\n        ordered_results = ordered_results[:size]\n        return ordered_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/apis/train.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport random\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import (DistSamplerSeedHook, EpochBasedRunner,\n                         Fp16OptimizerHook, OptimizerHook, build_runner,\n                         get_dist_info)\n\nfrom mmdet.core import DistEvalHook, EvalHook, build_optimizer\nfrom mmdet.datasets import (build_dataloader, build_dataset,\n                            replace_ImageToTensor)\nfrom mmdet.utils import (build_ddp, build_dp, compat_cfg,\n                         find_latest_checkpoint, get_root_logger)\n\n\ndef init_random_seed(seed=None, device='cuda'):\n    \"\"\"Initialize random seed.\n\n    If the seed is not set, the seed will be automatically randomized,\n    and then broadcast to all processes to prevent some potential bugs.\n\n    Args:\n        seed (int, Optional): The seed. Default to None.\n        device (str): The device where the seed will be put on.\n            Default to 'cuda'.\n\n    Returns:\n        int: Seed to be used.\n    \"\"\"\n    if seed is not None:\n        return seed\n\n    # Make sure all ranks share the same random seed to prevent\n    # some potential bugs. Please refer to\n    # https://github.com/open-mmlab/mmdetection/issues/6339\n    rank, world_size = get_dist_info()\n    seed = np.random.randint(2**31)\n    if world_size == 1:\n        return seed\n\n    if rank == 0:\n        random_num = torch.tensor(seed, dtype=torch.int32, device=device)\n    else:\n        random_num = torch.tensor(0, dtype=torch.int32, device=device)\n    dist.broadcast(random_num, src=0)\n    return random_num.item()\n\n\ndef set_random_seed(seed, deterministic=False):\n    \"\"\"Set random seed.\n\n    Args:\n        seed (int): Seed to be used.\n        deterministic (bool): Whether to set the deterministic option for\n            CUDNN backend, i.e., set `torch.backends.cudnn.deterministic`\n            to True and `torch.backends.cudnn.benchmark` to False.\n            Default: False.\n    \"\"\"\n    random.seed(seed)\n    np.random.seed(seed)\n    torch.manual_seed(seed)\n    torch.cuda.manual_seed_all(seed)\n    if deterministic:\n        torch.backends.cudnn.deterministic = True\n        torch.backends.cudnn.benchmark = False\n\n\ndef auto_scale_lr(cfg, distributed, logger):\n    \"\"\"Automatically scaling LR according to GPU number and sample per GPU.\n\n    Args:\n        cfg (config): Training config.\n        distributed (bool): Using distributed or not.\n        logger (logging.Logger): Logger.\n    \"\"\"\n    # Get flag from config\n    if ('auto_scale_lr' not in cfg) or \\\n            (not cfg.auto_scale_lr.get('enable', False)):\n        logger.info('Automatic scaling of learning rate (LR)'\n                    ' has been disabled.')\n        return\n\n    # Get base batch size from config\n    base_batch_size = cfg.auto_scale_lr.get('base_batch_size', None)\n    if base_batch_size is None:\n        return\n\n    # Get gpu number\n    if distributed:\n        _, world_size = get_dist_info()\n        num_gpus = len(range(world_size))\n    else:\n        num_gpus = len(cfg.gpu_ids)\n\n    # calculate the batch size\n    samples_per_gpu = cfg.data.train_dataloader.samples_per_gpu\n    batch_size = num_gpus * samples_per_gpu\n    logger.info(f'Training with {num_gpus} GPU(s) with {samples_per_gpu} '\n                f'samples per GPU. The total batch size is {batch_size}.')\n\n    if batch_size != base_batch_size:\n        # scale LR with\n        # [linear scaling rule](https://arxiv.org/abs/1706.02677)\n        scaled_lr = (batch_size / base_batch_size) * cfg.optimizer.lr\n        logger.info('LR has been automatically scaled '\n                    f'from {cfg.optimizer.lr} to {scaled_lr}')\n        cfg.optimizer.lr = scaled_lr\n    else:\n        logger.info('The batch size match the '\n                    f'base batch size: {base_batch_size}, '\n                    f'will not scaling the LR ({cfg.optimizer.lr}).')\n\n\ndef train_detector(model,\n                   dataset,\n                   cfg,\n                   distributed=False,\n                   validate=False,\n                   timestamp=None,\n                   meta=None):\n\n    cfg = compat_cfg(cfg)\n    logger = get_root_logger(log_level=cfg.log_level)\n\n    # prepare data loaders\n    dataset = dataset if isinstance(dataset, (list, tuple)) else [dataset]\n\n    runner_type = 'EpochBasedRunner' if 'runner' not in cfg else cfg.runner[\n        'type']\n\n    train_dataloader_default_args = dict(\n        samples_per_gpu=2,\n        workers_per_gpu=2,\n        # `num_gpus` will be ignored if distributed\n        num_gpus=len(cfg.gpu_ids),\n        dist=distributed,\n        seed=cfg.seed,\n        runner_type=runner_type,\n        persistent_workers=False)\n\n    train_loader_cfg = {\n        **train_dataloader_default_args,\n        **cfg.data.get('train_dataloader', {})\n    }\n\n    data_loaders = [build_dataloader(ds, **train_loader_cfg) for ds in dataset]\n\n    # put model on gpus\n    if distributed:\n        find_unused_parameters = cfg.get('find_unused_parameters', False)\n        # Sets the `find_unused_parameters` parameter in\n        # torch.nn.parallel.DistributedDataParallel\n        model = build_ddp(\n            model,\n            cfg.device,\n            device_ids=[int(os.environ['LOCAL_RANK'])],\n            broadcast_buffers=False,\n            find_unused_parameters=find_unused_parameters)\n    else:\n        model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)\n\n    # build optimizer\n    auto_scale_lr(cfg, distributed, logger)\n    optimizer = build_optimizer(model, cfg.optimizer)\n\n    runner = build_runner(\n        cfg.runner,\n        default_args=dict(\n            model=model,\n            optimizer=optimizer,\n            work_dir=cfg.work_dir,\n            logger=logger,\n            meta=meta))\n\n    # an ugly workaround to make .log and .log.json filenames the same\n    runner.timestamp = timestamp\n\n    # fp16 setting\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is None and cfg.get('device', None) == 'npu':\n        fp16_cfg = dict(loss_scale='dynamic')\n    if fp16_cfg is not None:\n        optimizer_config = Fp16OptimizerHook(\n            **cfg.optimizer_config, **fp16_cfg, distributed=distributed)\n    elif distributed and 'type' not in cfg.optimizer_config:\n        optimizer_config = OptimizerHook(**cfg.optimizer_config)\n    else:\n        optimizer_config = cfg.optimizer_config\n\n    # register hooks\n    runner.register_training_hooks(\n        cfg.lr_config,\n        optimizer_config,\n        cfg.checkpoint_config,\n        cfg.log_config,\n        cfg.get('momentum_config', None),\n        custom_hooks_config=cfg.get('custom_hooks', None))\n\n    if distributed:\n        if isinstance(runner, EpochBasedRunner):\n            runner.register_hook(DistSamplerSeedHook())\n\n    # register eval hooks\n    if validate:\n        val_dataloader_default_args = dict(\n            samples_per_gpu=1,\n            workers_per_gpu=2,\n            dist=distributed,\n            shuffle=False,\n            persistent_workers=False)\n\n        val_dataloader_args = {\n            **val_dataloader_default_args,\n            **cfg.data.get('val_dataloader', {})\n        }\n        # Support batch_size > 1 in validation\n\n        if val_dataloader_args['samples_per_gpu'] > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.val.pipeline = replace_ImageToTensor(\n                cfg.data.val.pipeline)\n        val_dataset = build_dataset(cfg.data.val, dict(test_mode=True))\n\n        val_dataloader = build_dataloader(val_dataset, **val_dataloader_args)\n        eval_cfg = cfg.get('evaluation', {})\n        eval_cfg['by_epoch'] = cfg.runner['type'] != 'IterBasedRunner'\n        eval_hook = DistEvalHook if distributed else EvalHook\n        # In this PR (https://github.com/open-mmlab/mmcv/pull/1193), the\n        # priority of IterTimerHook has been modified from 'NORMAL' to 'LOW'.\n        runner.register_hook(\n            eval_hook(val_dataloader, **eval_cfg), priority='LOW')\n\n    resume_from = None\n    if cfg.resume_from is None and cfg.get('auto_resume'):\n        resume_from = find_latest_checkpoint(cfg.work_dir)\n    if resume_from is not None:\n        cfg.resume_from = resume_from\n\n    if cfg.resume_from:\n        runner.resume(cfg.resume_from)\n    elif cfg.load_from:\n        runner.load_checkpoint(cfg.load_from)\n    runner.run(data_loaders, cfg.workflow)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .anchor import *  # noqa: F401, F403\nfrom .bbox import *  # noqa: F401, F403\nfrom .data_structures import *  # noqa: F401, F403\nfrom .evaluation import *  # noqa: F401, F403\nfrom .hook import *  # noqa: F401, F403\nfrom .mask import *  # noqa: F401, F403\nfrom .optimizers import *  # noqa: F401, F403\nfrom .post_processing import *  # noqa: F401, F403\nfrom .utils import *  # noqa: F401, F403\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/anchor/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .anchor_generator import (AnchorGenerator, LegacyAnchorGenerator,\n                               YOLOAnchorGenerator)\nfrom .builder import (ANCHOR_GENERATORS, PRIOR_GENERATORS,\n                      build_anchor_generator, build_prior_generator)\nfrom .point_generator import MlvlPointGenerator, PointGenerator\nfrom .utils import anchor_inside_flags, calc_region, images_to_levels\n\n__all__ = [\n    'AnchorGenerator', 'LegacyAnchorGenerator', 'anchor_inside_flags',\n    'PointGenerator', 'images_to_levels', 'calc_region',\n    'build_anchor_generator', 'ANCHOR_GENERATORS', 'YOLOAnchorGenerator',\n    'build_prior_generator', 'PRIOR_GENERATORS', 'MlvlPointGenerator'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/anchor/anchor_generator.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom torch.nn.modules.utils import _pair\n\nfrom .builder import PRIOR_GENERATORS\n\n\n@PRIOR_GENERATORS.register_module()\nclass AnchorGenerator:\n    \"\"\"Standard anchor generator for 2D anchor-based detectors.\n\n    Args:\n        strides (list[int] | list[tuple[int, int]]): Strides of anchors\n            in multiple feature levels in order (w, h).\n        ratios (list[float]): The list of ratios between the height and width\n            of anchors in a single level.\n        scales (list[int] | None): Anchor scales for anchors in a single level.\n            It cannot be set at the same time if `octave_base_scale` and\n            `scales_per_octave` are set.\n        base_sizes (list[int] | None): The basic sizes\n            of anchors in multiple levels.\n            If None is given, strides will be used as base_sizes.\n            (If strides are non square, the shortest stride is taken.)\n        scale_major (bool): Whether to multiply scales first when generating\n            base anchors. If true, the anchors in the same row will have the\n            same scales. By default it is True in V2.0\n        octave_base_scale (int): The base scale of octave.\n        scales_per_octave (int): Number of scales for each octave.\n            `octave_base_scale` and `scales_per_octave` are usually used in\n            retinanet and the `scales` should be None when they are set.\n        centers (list[tuple[float, float]] | None): The centers of the anchor\n            relative to the feature grid center in multiple feature levels.\n            By default it is set to be None and not used. If a list of tuple of\n            float is given, they will be used to shift the centers of anchors.\n        center_offset (float): The offset of center in proportion to anchors'\n            width and height. By default it is 0 in V2.0.\n\n    Examples:\n        >>> from mmdet.core import AnchorGenerator\n        >>> self = AnchorGenerator([16], [1.], [1.], [9])\n        >>> all_anchors = self.grid_priors([(2, 2)], device='cpu')\n        >>> print(all_anchors)\n        [tensor([[-4.5000, -4.5000,  4.5000,  4.5000],\n                [11.5000, -4.5000, 20.5000,  4.5000],\n                [-4.5000, 11.5000,  4.5000, 20.5000],\n                [11.5000, 11.5000, 20.5000, 20.5000]])]\n        >>> self = AnchorGenerator([16, 32], [1.], [1.], [9, 18])\n        >>> all_anchors = self.grid_priors([(2, 2), (1, 1)], device='cpu')\n        >>> print(all_anchors)\n        [tensor([[-4.5000, -4.5000,  4.5000,  4.5000],\n                [11.5000, -4.5000, 20.5000,  4.5000],\n                [-4.5000, 11.5000,  4.5000, 20.5000],\n                [11.5000, 11.5000, 20.5000, 20.5000]]), \\\n        tensor([[-9., -9., 9., 9.]])]\n    \"\"\"\n\n    def __init__(self,\n                 strides,\n                 ratios,\n                 scales=None,\n                 base_sizes=None,\n                 scale_major=True,\n                 octave_base_scale=None,\n                 scales_per_octave=None,\n                 centers=None,\n                 center_offset=0.):\n        # check center and center_offset\n        if center_offset != 0:\n            assert centers is None, 'center cannot be set when center_offset' \\\n                                    f'!=0, {centers} is given.'\n        if not (0 <= center_offset <= 1):\n            raise ValueError('center_offset should be in range [0, 1], '\n                             f'{center_offset} is given.')\n        if centers is not None:\n            assert len(centers) == len(strides), \\\n                'The number of strides should be the same as centers, got ' \\\n                f'{strides} and {centers}'\n\n        # calculate base sizes of anchors\n        self.strides = [_pair(stride) for stride in strides]\n        self.base_sizes = [min(stride) for stride in self.strides\n                           ] if base_sizes is None else base_sizes\n        assert len(self.base_sizes) == len(self.strides), \\\n            'The number of strides should be the same as base sizes, got ' \\\n            f'{self.strides} and {self.base_sizes}'\n\n        # calculate scales of anchors\n        assert ((octave_base_scale is not None\n                 and scales_per_octave is not None) ^ (scales is not None)), \\\n            'scales and octave_base_scale with scales_per_octave cannot' \\\n            ' be set at the same time'\n        if scales is not None:\n            self.scales = torch.Tensor(scales)\n        elif octave_base_scale is not None and scales_per_octave is not None:\n            octave_scales = np.array(\n                [2**(i / scales_per_octave) for i in range(scales_per_octave)])\n            scales = octave_scales * octave_base_scale\n            self.scales = torch.Tensor(scales)\n        else:\n            raise ValueError('Either scales or octave_base_scale with '\n                             'scales_per_octave should be set')\n\n        self.octave_base_scale = octave_base_scale\n        self.scales_per_octave = scales_per_octave\n        self.ratios = torch.Tensor(ratios)\n        self.scale_major = scale_major\n        self.centers = centers\n        self.center_offset = center_offset\n        self.base_anchors = self.gen_base_anchors()\n\n    @property\n    def num_base_anchors(self):\n        \"\"\"list[int]: total number of base anchors in a feature grid\"\"\"\n        return self.num_base_priors\n\n    @property\n    def num_base_priors(self):\n        \"\"\"list[int]: The number of priors (anchors) at a point\n        on the feature grid\"\"\"\n        return [base_anchors.size(0) for base_anchors in self.base_anchors]\n\n    @property\n    def num_levels(self):\n        \"\"\"int: number of feature levels that the generator will be applied\"\"\"\n        return len(self.strides)\n\n    def gen_base_anchors(self):\n        \"\"\"Generate base anchors.\n\n        Returns:\n            list(torch.Tensor): Base anchors of a feature grid in multiple \\\n                feature levels.\n        \"\"\"\n        multi_level_base_anchors = []\n        for i, base_size in enumerate(self.base_sizes):\n            center = None\n            if self.centers is not None:\n                center = self.centers[i]\n            multi_level_base_anchors.append(\n                self.gen_single_level_base_anchors(\n                    base_size,\n                    scales=self.scales,\n                    ratios=self.ratios,\n                    center=center))\n        return multi_level_base_anchors\n\n    def gen_single_level_base_anchors(self,\n                                      base_size,\n                                      scales,\n                                      ratios,\n                                      center=None):\n        \"\"\"Generate base anchors of a single level.\n\n        Args:\n            base_size (int | float): Basic size of an anchor.\n            scales (torch.Tensor): Scales of the anchor.\n            ratios (torch.Tensor): The ratio between between the height\n                and width of anchors in a single level.\n            center (tuple[float], optional): The center of the base anchor\n                related to a single feature grid. Defaults to None.\n\n        Returns:\n            torch.Tensor: Anchors in a single-level feature maps.\n        \"\"\"\n        w = base_size\n        h = base_size\n        if center is None:\n            x_center = self.center_offset * w\n            y_center = self.center_offset * h\n        else:\n            x_center, y_center = center\n\n        h_ratios = torch.sqrt(ratios)\n        w_ratios = 1 / h_ratios\n        if self.scale_major:\n            ws = (w * w_ratios[:, None] * scales[None, :]).view(-1)\n            hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)\n        else:\n            ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)\n            hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)\n\n        # use float anchor and the anchor's center is aligned with the\n        # pixel center\n        base_anchors = [\n            x_center - 0.5 * ws, y_center - 0.5 * hs, x_center + 0.5 * ws,\n            y_center + 0.5 * hs\n        ]\n        base_anchors = torch.stack(base_anchors, dim=-1)\n\n        return base_anchors\n\n    def _meshgrid(self, x, y, row_major=True):\n        \"\"\"Generate mesh grid of x and y.\n\n        Args:\n            x (torch.Tensor): Grids of x dimension.\n            y (torch.Tensor): Grids of y dimension.\n            row_major (bool, optional): Whether to return y grids first.\n                Defaults to True.\n\n        Returns:\n            tuple[torch.Tensor]: The mesh grids of x and y.\n        \"\"\"\n        # use shape instead of len to keep tracing while exporting to onnx\n        xx = x.repeat(y.shape[0])\n        yy = y.view(-1, 1).repeat(1, x.shape[0]).view(-1)\n        if row_major:\n            return xx, yy\n        else:\n            return yy, xx\n\n    def grid_priors(self, featmap_sizes, dtype=torch.float32, device='cuda'):\n        \"\"\"Generate grid anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes in\n                multiple feature levels.\n            dtype (:obj:`torch.dtype`): Dtype of priors.\n                Default: torch.float32.\n            device (str): The device where the anchors will be put on.\n\n        Return:\n            list[torch.Tensor]: Anchors in multiple feature levels. \\\n                The sizes of each tensor should be [N, 4], where \\\n                N = width * height * num_base_anchors, width and height \\\n                are the sizes of the corresponding feature level, \\\n                num_base_anchors is the number of anchors for that level.\n        \"\"\"\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_anchors = []\n        for i in range(self.num_levels):\n            anchors = self.single_level_grid_priors(\n                featmap_sizes[i], level_idx=i, dtype=dtype, device=device)\n            multi_level_anchors.append(anchors)\n        return multi_level_anchors\n\n    def single_level_grid_priors(self,\n                                 featmap_size,\n                                 level_idx,\n                                 dtype=torch.float32,\n                                 device='cuda'):\n        \"\"\"Generate grid anchors of a single level.\n\n        Note:\n            This function is usually called by method ``self.grid_priors``.\n\n        Args:\n            featmap_size (tuple[int]): Size of the feature maps.\n            level_idx (int): The index of corresponding feature map level.\n            dtype (obj:`torch.dtype`): Date type of points.Defaults to\n                ``torch.float32``.\n            device (str, optional): The device the tensor will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors in the overall feature maps.\n        \"\"\"\n\n        base_anchors = self.base_anchors[level_idx].to(device).to(dtype)\n        feat_h, feat_w = featmap_size\n        stride_w, stride_h = self.strides[level_idx]\n        # First create Range with the default dtype, than convert to\n        # target `dtype` for onnx exporting.\n        shift_x = torch.arange(0, feat_w, device=device).to(dtype) * stride_w\n        shift_y = torch.arange(0, feat_h, device=device).to(dtype) * stride_h\n\n        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)\n        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)\n        # first feat_w elements correspond to the first row of shifts\n        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get\n        # shifted anchors (K, A, 4), reshape to (K*A, 4)\n\n        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]\n        all_anchors = all_anchors.view(-1, 4)\n        # first A rows correspond to A anchors of (0, 0) in feature map,\n        # then (0, 1), (0, 2), ...\n        return all_anchors\n\n    def sparse_priors(self,\n                      prior_idxs,\n                      featmap_size,\n                      level_idx,\n                      dtype=torch.float32,\n                      device='cuda'):\n        \"\"\"Generate sparse anchors according to the ``prior_idxs``.\n\n        Args:\n            prior_idxs (Tensor): The index of corresponding anchors\n                in the feature map.\n            featmap_size (tuple[int]): feature map size arrange as (h, w).\n            level_idx (int): The level index of corresponding feature\n                map.\n            dtype (obj:`torch.dtype`): Date type of points.Defaults to\n                ``torch.float32``.\n            device (obj:`torch.device`): The device where the points is\n                located.\n        Returns:\n            Tensor: Anchor with shape (N, 4), N should be equal to\n                the length of ``prior_idxs``.\n        \"\"\"\n\n        height, width = featmap_size\n        num_base_anchors = self.num_base_anchors[level_idx]\n        base_anchor_id = prior_idxs % num_base_anchors\n        x = (prior_idxs //\n             num_base_anchors) % width * self.strides[level_idx][0]\n        y = (prior_idxs // width //\n             num_base_anchors) % height * self.strides[level_idx][1]\n        priors = torch.stack([x, y, x, y], 1).to(dtype).to(device) + \\\n            self.base_anchors[level_idx][base_anchor_id, :].to(device)\n\n        return priors\n\n    def grid_anchors(self, featmap_sizes, device='cuda'):\n        \"\"\"Generate grid anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes in\n                multiple feature levels.\n            device (str): Device where the anchors will be put on.\n\n        Return:\n            list[torch.Tensor]: Anchors in multiple feature levels. \\\n                The sizes of each tensor should be [N, 4], where \\\n                N = width * height * num_base_anchors, width and height \\\n                are the sizes of the corresponding feature level, \\\n                num_base_anchors is the number of anchors for that level.\n        \"\"\"\n        warnings.warn('``grid_anchors`` would be deprecated soon. '\n                      'Please use ``grid_priors`` ')\n\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_anchors = []\n        for i in range(self.num_levels):\n            anchors = self.single_level_grid_anchors(\n                self.base_anchors[i].to(device),\n                featmap_sizes[i],\n                self.strides[i],\n                device=device)\n            multi_level_anchors.append(anchors)\n        return multi_level_anchors\n\n    def single_level_grid_anchors(self,\n                                  base_anchors,\n                                  featmap_size,\n                                  stride=(16, 16),\n                                  device='cuda'):\n        \"\"\"Generate grid anchors of a single level.\n\n        Note:\n            This function is usually called by method ``self.grid_anchors``.\n\n        Args:\n            base_anchors (torch.Tensor): The base anchors of a feature grid.\n            featmap_size (tuple[int]): Size of the feature maps.\n            stride (tuple[int], optional): Stride of the feature map in order\n                (w, h). Defaults to (16, 16).\n            device (str, optional): Device the tensor will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: Anchors in the overall feature maps.\n        \"\"\"\n\n        warnings.warn(\n            '``single_level_grid_anchors`` would be deprecated soon. '\n            'Please use ``single_level_grid_priors`` ')\n\n        # keep featmap_size as Tensor instead of int, so that we\n        # can convert to ONNX correctly\n        feat_h, feat_w = featmap_size\n        shift_x = torch.arange(0, feat_w, device=device) * stride[0]\n        shift_y = torch.arange(0, feat_h, device=device) * stride[1]\n\n        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)\n        shifts = torch.stack([shift_xx, shift_yy, shift_xx, shift_yy], dim=-1)\n        shifts = shifts.type_as(base_anchors)\n        # first feat_w elements correspond to the first row of shifts\n        # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get\n        # shifted anchors (K, A, 4), reshape to (K*A, 4)\n\n        all_anchors = base_anchors[None, :, :] + shifts[:, None, :]\n        all_anchors = all_anchors.view(-1, 4)\n        # first A rows correspond to A anchors of (0, 0) in feature map,\n        # then (0, 1), (0, 2), ...\n        return all_anchors\n\n    def valid_flags(self, featmap_sizes, pad_shape, device='cuda'):\n        \"\"\"Generate valid flags of anchors in multiple feature levels.\n\n        Args:\n            featmap_sizes (list(tuple)): List of feature map sizes in\n                multiple feature levels.\n            pad_shape (tuple): The padded shape of the image.\n            device (str): Device where the anchors will be put on.\n\n        Return:\n            list(torch.Tensor): Valid flags of anchors in multiple levels.\n        \"\"\"\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_flags = []\n        for i in range(self.num_levels):\n            anchor_stride = self.strides[i]\n            feat_h, feat_w = featmap_sizes[i]\n            h, w = pad_shape[:2]\n            valid_feat_h = min(int(np.ceil(h / anchor_stride[1])), feat_h)\n            valid_feat_w = min(int(np.ceil(w / anchor_stride[0])), feat_w)\n            flags = self.single_level_valid_flags((feat_h, feat_w),\n                                                  (valid_feat_h, valid_feat_w),\n                                                  self.num_base_anchors[i],\n                                                  device=device)\n            multi_level_flags.append(flags)\n        return multi_level_flags\n\n    def single_level_valid_flags(self,\n                                 featmap_size,\n                                 valid_size,\n                                 num_base_anchors,\n                                 device='cuda'):\n        \"\"\"Generate the valid flags of anchor in a single feature map.\n\n        Args:\n            featmap_size (tuple[int]): The size of feature maps, arrange\n                as (h, w).\n            valid_size (tuple[int]): The valid size of the feature maps.\n            num_base_anchors (int): The number of base anchors.\n            device (str, optional): Device where the flags will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: The valid flags of each anchor in a single level \\\n                feature map.\n        \"\"\"\n        feat_h, feat_w = featmap_size\n        valid_h, valid_w = valid_size\n        assert valid_h <= feat_h and valid_w <= feat_w\n        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)\n        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)\n        valid_x[:valid_w] = 1\n        valid_y[:valid_h] = 1\n        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)\n        valid = valid_xx & valid_yy\n        valid = valid[:, None].expand(valid.size(0),\n                                      num_base_anchors).contiguous().view(-1)\n        return valid\n\n    def __repr__(self):\n        \"\"\"str: a string that describes the module\"\"\"\n        indent_str = '    '\n        repr_str = self.__class__.__name__ + '(\\n'\n        repr_str += f'{indent_str}strides={self.strides},\\n'\n        repr_str += f'{indent_str}ratios={self.ratios},\\n'\n        repr_str += f'{indent_str}scales={self.scales},\\n'\n        repr_str += f'{indent_str}base_sizes={self.base_sizes},\\n'\n        repr_str += f'{indent_str}scale_major={self.scale_major},\\n'\n        repr_str += f'{indent_str}octave_base_scale='\n        repr_str += f'{self.octave_base_scale},\\n'\n        repr_str += f'{indent_str}scales_per_octave='\n        repr_str += f'{self.scales_per_octave},\\n'\n        repr_str += f'{indent_str}num_levels={self.num_levels}\\n'\n        repr_str += f'{indent_str}centers={self.centers},\\n'\n        repr_str += f'{indent_str}center_offset={self.center_offset})'\n        return repr_str\n\n\n@PRIOR_GENERATORS.register_module()\nclass SSDAnchorGenerator(AnchorGenerator):\n    \"\"\"Anchor generator for SSD.\n\n    Args:\n        strides (list[int]  | list[tuple[int, int]]): Strides of anchors\n            in multiple feature levels.\n        ratios (list[float]): The list of ratios between the height and width\n            of anchors in a single level.\n        min_sizes (list[float]): The list of minimum anchor sizes on each\n            level.\n        max_sizes (list[float]): The list of maximum anchor sizes on each\n            level.\n        basesize_ratio_range (tuple(float)): Ratio range of anchors. Being\n            used when not setting min_sizes and max_sizes.\n        input_size (int): Size of feature map, 300 for SSD300, 512 for\n            SSD512. Being used when not setting min_sizes and max_sizes.\n        scale_major (bool): Whether to multiply scales first when generating\n            base anchors. If true, the anchors in the same row will have the\n            same scales. It is always set to be False in SSD.\n    \"\"\"\n\n    def __init__(self,\n                 strides,\n                 ratios,\n                 min_sizes=None,\n                 max_sizes=None,\n                 basesize_ratio_range=(0.15, 0.9),\n                 input_size=300,\n                 scale_major=True):\n        assert len(strides) == len(ratios)\n        assert not (min_sizes is None) ^ (max_sizes is None)\n        self.strides = [_pair(stride) for stride in strides]\n        self.centers = [(stride[0] / 2., stride[1] / 2.)\n                        for stride in self.strides]\n\n        if min_sizes is None and max_sizes is None:\n            # use hard code to generate SSD anchors\n            self.input_size = input_size\n            assert mmcv.is_tuple_of(basesize_ratio_range, float)\n            self.basesize_ratio_range = basesize_ratio_range\n            # calculate anchor ratios and sizes\n            min_ratio, max_ratio = basesize_ratio_range\n            min_ratio = int(min_ratio * 100)\n            max_ratio = int(max_ratio * 100)\n            step = int(np.floor(max_ratio - min_ratio) / (self.num_levels - 2))\n            min_sizes = []\n            max_sizes = []\n            for ratio in range(int(min_ratio), int(max_ratio) + 1, step):\n                min_sizes.append(int(self.input_size * ratio / 100))\n                max_sizes.append(int(self.input_size * (ratio + step) / 100))\n            if self.input_size == 300:\n                if basesize_ratio_range[0] == 0.15:  # SSD300 COCO\n                    min_sizes.insert(0, int(self.input_size * 7 / 100))\n                    max_sizes.insert(0, int(self.input_size * 15 / 100))\n                elif basesize_ratio_range[0] == 0.2:  # SSD300 VOC\n                    min_sizes.insert(0, int(self.input_size * 10 / 100))\n                    max_sizes.insert(0, int(self.input_size * 20 / 100))\n                else:\n                    raise ValueError(\n                        'basesize_ratio_range[0] should be either 0.15'\n                        'or 0.2 when input_size is 300, got '\n                        f'{basesize_ratio_range[0]}.')\n            elif self.input_size == 512:\n                if basesize_ratio_range[0] == 0.1:  # SSD512 COCO\n                    min_sizes.insert(0, int(self.input_size * 4 / 100))\n                    max_sizes.insert(0, int(self.input_size * 10 / 100))\n                elif basesize_ratio_range[0] == 0.15:  # SSD512 VOC\n                    min_sizes.insert(0, int(self.input_size * 7 / 100))\n                    max_sizes.insert(0, int(self.input_size * 15 / 100))\n                else:\n                    raise ValueError(\n                        'When not setting min_sizes and max_sizes,'\n                        'basesize_ratio_range[0] should be either 0.1'\n                        'or 0.15 when input_size is 512, got'\n                        f' {basesize_ratio_range[0]}.')\n            else:\n                raise ValueError(\n                    'Only support 300 or 512 in SSDAnchorGenerator when '\n                    'not setting min_sizes and max_sizes, '\n                    f'got {self.input_size}.')\n\n        assert len(min_sizes) == len(max_sizes) == len(strides)\n\n        anchor_ratios = []\n        anchor_scales = []\n        for k in range(len(self.strides)):\n            scales = [1., np.sqrt(max_sizes[k] / min_sizes[k])]\n            anchor_ratio = [1.]\n            for r in ratios[k]:\n                anchor_ratio += [1 / r, r]  # 4 or 6 ratio\n            anchor_ratios.append(torch.Tensor(anchor_ratio))\n            anchor_scales.append(torch.Tensor(scales))\n\n        self.base_sizes = min_sizes\n        self.scales = anchor_scales\n        self.ratios = anchor_ratios\n        self.scale_major = scale_major\n        self.center_offset = 0\n        self.base_anchors = self.gen_base_anchors()\n\n    def gen_base_anchors(self):\n        \"\"\"Generate base anchors.\n\n        Returns:\n            list(torch.Tensor): Base anchors of a feature grid in multiple \\\n                feature levels.\n        \"\"\"\n        multi_level_base_anchors = []\n        for i, base_size in enumerate(self.base_sizes):\n            base_anchors = self.gen_single_level_base_anchors(\n                base_size,\n                scales=self.scales[i],\n                ratios=self.ratios[i],\n                center=self.centers[i])\n            indices = list(range(len(self.ratios[i])))\n            indices.insert(1, len(indices))\n            base_anchors = torch.index_select(base_anchors, 0,\n                                              torch.LongTensor(indices))\n            multi_level_base_anchors.append(base_anchors)\n        return multi_level_base_anchors\n\n    def __repr__(self):\n        \"\"\"str: a string that describes the module\"\"\"\n        indent_str = '    '\n        repr_str = self.__class__.__name__ + '(\\n'\n        repr_str += f'{indent_str}strides={self.strides},\\n'\n        repr_str += f'{indent_str}scales={self.scales},\\n'\n        repr_str += f'{indent_str}scale_major={self.scale_major},\\n'\n        repr_str += f'{indent_str}input_size={self.input_size},\\n'\n        repr_str += f'{indent_str}scales={self.scales},\\n'\n        repr_str += f'{indent_str}ratios={self.ratios},\\n'\n        repr_str += f'{indent_str}num_levels={self.num_levels},\\n'\n        repr_str += f'{indent_str}base_sizes={self.base_sizes},\\n'\n        repr_str += f'{indent_str}basesize_ratio_range='\n        repr_str += f'{self.basesize_ratio_range})'\n        return repr_str\n\n\n@PRIOR_GENERATORS.register_module()\nclass LegacyAnchorGenerator(AnchorGenerator):\n    \"\"\"Legacy anchor generator used in MMDetection V1.x.\n\n    Note:\n        Difference to the V2.0 anchor generator:\n\n        1. The center offset of V1.x anchors are set to be 0.5 rather than 0.\n        2. The width/height are minused by 1 when calculating the anchors' \\\n            centers and corners to meet the V1.x coordinate system.\n        3. The anchors' corners are quantized.\n\n    Args:\n        strides (list[int] | list[tuple[int]]): Strides of anchors\n            in multiple feature levels.\n        ratios (list[float]): The list of ratios between the height and width\n            of anchors in a single level.\n        scales (list[int] | None): Anchor scales for anchors in a single level.\n            It cannot be set at the same time if `octave_base_scale` and\n            `scales_per_octave` are set.\n        base_sizes (list[int]): The basic sizes of anchors in multiple levels.\n            If None is given, strides will be used to generate base_sizes.\n        scale_major (bool): Whether to multiply scales first when generating\n            base anchors. If true, the anchors in the same row will have the\n            same scales. By default it is True in V2.0\n        octave_base_scale (int): The base scale of octave.\n        scales_per_octave (int): Number of scales for each octave.\n            `octave_base_scale` and `scales_per_octave` are usually used in\n            retinanet and the `scales` should be None when they are set.\n        centers (list[tuple[float, float]] | None): The centers of the anchor\n            relative to the feature grid center in multiple feature levels.\n            By default it is set to be None and not used. It a list of float\n            is given, this list will be used to shift the centers of anchors.\n        center_offset (float): The offset of center in proportion to anchors'\n            width and height. By default it is 0.5 in V2.0 but it should be 0.5\n            in v1.x models.\n\n    Examples:\n        >>> from mmdet.core import LegacyAnchorGenerator\n        >>> self = LegacyAnchorGenerator(\n        >>>     [16], [1.], [1.], [9], center_offset=0.5)\n        >>> all_anchors = self.grid_anchors(((2, 2),), device='cpu')\n        >>> print(all_anchors)\n        [tensor([[ 0.,  0.,  8.,  8.],\n                [16.,  0., 24.,  8.],\n                [ 0., 16.,  8., 24.],\n                [16., 16., 24., 24.]])]\n    \"\"\"\n\n    def gen_single_level_base_anchors(self,\n                                      base_size,\n                                      scales,\n                                      ratios,\n                                      center=None):\n        \"\"\"Generate base anchors of a single level.\n\n        Note:\n            The width/height of anchors are minused by 1 when calculating \\\n                the centers and corners to meet the V1.x coordinate system.\n\n        Args:\n            base_size (int | float): Basic size of an anchor.\n            scales (torch.Tensor): Scales of the anchor.\n            ratios (torch.Tensor): The ratio between between the height.\n                and width of anchors in a single level.\n            center (tuple[float], optional): The center of the base anchor\n                related to a single feature grid. Defaults to None.\n\n        Returns:\n            torch.Tensor: Anchors in a single-level feature map.\n        \"\"\"\n        w = base_size\n        h = base_size\n        if center is None:\n            x_center = self.center_offset * (w - 1)\n            y_center = self.center_offset * (h - 1)\n        else:\n            x_center, y_center = center\n\n        h_ratios = torch.sqrt(ratios)\n        w_ratios = 1 / h_ratios\n        if self.scale_major:\n            ws = (w * w_ratios[:, None] * scales[None, :]).view(-1)\n            hs = (h * h_ratios[:, None] * scales[None, :]).view(-1)\n        else:\n            ws = (w * scales[:, None] * w_ratios[None, :]).view(-1)\n            hs = (h * scales[:, None] * h_ratios[None, :]).view(-1)\n\n        # use float anchor and the anchor's center is aligned with the\n        # pixel center\n        base_anchors = [\n            x_center - 0.5 * (ws - 1), y_center - 0.5 * (hs - 1),\n            x_center + 0.5 * (ws - 1), y_center + 0.5 * (hs - 1)\n        ]\n        base_anchors = torch.stack(base_anchors, dim=-1).round()\n\n        return base_anchors\n\n\n@PRIOR_GENERATORS.register_module()\nclass LegacySSDAnchorGenerator(SSDAnchorGenerator, LegacyAnchorGenerator):\n    \"\"\"Legacy anchor generator used in MMDetection V1.x.\n\n    The difference between `LegacySSDAnchorGenerator` and `SSDAnchorGenerator`\n    can be found in `LegacyAnchorGenerator`.\n    \"\"\"\n\n    def __init__(self,\n                 strides,\n                 ratios,\n                 basesize_ratio_range,\n                 input_size=300,\n                 scale_major=True):\n        super(LegacySSDAnchorGenerator, self).__init__(\n            strides=strides,\n            ratios=ratios,\n            basesize_ratio_range=basesize_ratio_range,\n            input_size=input_size,\n            scale_major=scale_major)\n        self.centers = [((stride - 1) / 2., (stride - 1) / 2.)\n                        for stride in strides]\n        self.base_anchors = self.gen_base_anchors()\n\n\n@PRIOR_GENERATORS.register_module()\nclass YOLOAnchorGenerator(AnchorGenerator):\n    \"\"\"Anchor generator for YOLO.\n\n    Args:\n        strides (list[int] | list[tuple[int, int]]): Strides of anchors\n            in multiple feature levels.\n        base_sizes (list[list[tuple[int, int]]]): The basic sizes\n            of anchors in multiple levels.\n    \"\"\"\n\n    def __init__(self, strides, base_sizes):\n        self.strides = [_pair(stride) for stride in strides]\n        self.centers = [(stride[0] / 2., stride[1] / 2.)\n                        for stride in self.strides]\n        self.base_sizes = []\n        num_anchor_per_level = len(base_sizes[0])\n        for base_sizes_per_level in base_sizes:\n            assert num_anchor_per_level == len(base_sizes_per_level)\n            self.base_sizes.append(\n                [_pair(base_size) for base_size in base_sizes_per_level])\n        self.base_anchors = self.gen_base_anchors()\n\n    @property\n    def num_levels(self):\n        \"\"\"int: number of feature levels that the generator will be applied\"\"\"\n        return len(self.base_sizes)\n\n    def gen_base_anchors(self):\n        \"\"\"Generate base anchors.\n\n        Returns:\n            list(torch.Tensor): Base anchors of a feature grid in multiple \\\n                feature levels.\n        \"\"\"\n        multi_level_base_anchors = []\n        for i, base_sizes_per_level in enumerate(self.base_sizes):\n            center = None\n            if self.centers is not None:\n                center = self.centers[i]\n            multi_level_base_anchors.append(\n                self.gen_single_level_base_anchors(base_sizes_per_level,\n                                                   center))\n        return multi_level_base_anchors\n\n    def gen_single_level_base_anchors(self, base_sizes_per_level, center=None):\n        \"\"\"Generate base anchors of a single level.\n\n        Args:\n            base_sizes_per_level (list[tuple[int, int]]): Basic sizes of\n                anchors.\n            center (tuple[float], optional): The center of the base anchor\n                related to a single feature grid. Defaults to None.\n\n        Returns:\n            torch.Tensor: Anchors in a single-level feature maps.\n        \"\"\"\n        x_center, y_center = center\n        base_anchors = []\n        for base_size in base_sizes_per_level:\n            w, h = base_size\n\n            # use float anchor and the anchor's center is aligned with the\n            # pixel center\n            base_anchor = torch.Tensor([\n                x_center - 0.5 * w, y_center - 0.5 * h, x_center + 0.5 * w,\n                y_center + 0.5 * h\n            ])\n            base_anchors.append(base_anchor)\n        base_anchors = torch.stack(base_anchors, dim=0)\n\n        return base_anchors\n\n    def responsible_flags(self, featmap_sizes, gt_bboxes, device='cuda'):\n        \"\"\"Generate responsible anchor flags of grid cells in multiple scales.\n\n        Args:\n            featmap_sizes (list(tuple)): List of feature map sizes in multiple\n                feature levels.\n            gt_bboxes (Tensor): Ground truth boxes, shape (n, 4).\n            device (str): Device where the anchors will be put on.\n\n        Return:\n            list(torch.Tensor): responsible flags of anchors in multiple level\n        \"\"\"\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_responsible_flags = []\n        for i in range(self.num_levels):\n            anchor_stride = self.strides[i]\n            flags = self.single_level_responsible_flags(\n                featmap_sizes[i],\n                gt_bboxes,\n                anchor_stride,\n                self.num_base_anchors[i],\n                device=device)\n            multi_level_responsible_flags.append(flags)\n        return multi_level_responsible_flags\n\n    def single_level_responsible_flags(self,\n                                       featmap_size,\n                                       gt_bboxes,\n                                       stride,\n                                       num_base_anchors,\n                                       device='cuda'):\n        \"\"\"Generate the responsible flags of anchor in a single feature map.\n\n        Args:\n            featmap_size (tuple[int]): The size of feature maps.\n            gt_bboxes (Tensor): Ground truth boxes, shape (n, 4).\n            stride (tuple(int)): stride of current level\n            num_base_anchors (int): The number of base anchors.\n            device (str, optional): Device where the flags will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: The valid flags of each anchor in a single level \\\n                feature map.\n        \"\"\"\n        feat_h, feat_w = featmap_size\n        gt_bboxes_cx = ((gt_bboxes[:, 0] + gt_bboxes[:, 2]) * 0.5).to(device)\n        gt_bboxes_cy = ((gt_bboxes[:, 1] + gt_bboxes[:, 3]) * 0.5).to(device)\n        gt_bboxes_grid_x = torch.floor(gt_bboxes_cx / stride[0]).long()\n        gt_bboxes_grid_y = torch.floor(gt_bboxes_cy / stride[1]).long()\n\n        # row major indexing\n        gt_bboxes_grid_idx = gt_bboxes_grid_y * feat_w + gt_bboxes_grid_x\n\n        responsible_grid = torch.zeros(\n            feat_h * feat_w, dtype=torch.uint8, device=device)\n        responsible_grid[gt_bboxes_grid_idx] = 1\n\n        responsible_grid = responsible_grid[:, None].expand(\n            responsible_grid.size(0), num_base_anchors).contiguous().view(-1)\n        return responsible_grid\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/anchor/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom mmcv.utils import Registry, build_from_cfg\n\nPRIOR_GENERATORS = Registry('Generator for anchors and points')\n\nANCHOR_GENERATORS = PRIOR_GENERATORS\n\n\ndef build_prior_generator(cfg, default_args=None):\n    return build_from_cfg(cfg, PRIOR_GENERATORS, default_args)\n\n\ndef build_anchor_generator(cfg, default_args=None):\n    warnings.warn(\n        '``build_anchor_generator`` would be deprecated soon, please use '\n        '``build_prior_generator`` ')\n    return build_prior_generator(cfg, default_args=default_args)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/anchor/point_generator.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom torch.nn.modules.utils import _pair\n\nfrom .builder import PRIOR_GENERATORS\n\n\n@PRIOR_GENERATORS.register_module()\nclass PointGenerator:\n\n    def _meshgrid(self, x, y, row_major=True):\n        xx = x.repeat(len(y))\n        yy = y.view(-1, 1).repeat(1, len(x)).view(-1)\n        if row_major:\n            return xx, yy\n        else:\n            return yy, xx\n\n    def grid_points(self, featmap_size, stride=16, device='cuda'):\n        feat_h, feat_w = featmap_size\n        shift_x = torch.arange(0., feat_w, device=device) * stride\n        shift_y = torch.arange(0., feat_h, device=device) * stride\n        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)\n        stride = shift_x.new_full((shift_xx.shape[0], ), stride)\n        shifts = torch.stack([shift_xx, shift_yy, stride], dim=-1)\n        all_points = shifts.to(device)\n        return all_points\n\n    def valid_flags(self, featmap_size, valid_size, device='cuda'):\n        feat_h, feat_w = featmap_size\n        valid_h, valid_w = valid_size\n        assert valid_h <= feat_h and valid_w <= feat_w\n        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)\n        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)\n        valid_x[:valid_w] = 1\n        valid_y[:valid_h] = 1\n        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)\n        valid = valid_xx & valid_yy\n        return valid\n\n\n@PRIOR_GENERATORS.register_module()\nclass MlvlPointGenerator:\n    \"\"\"Standard points generator for multi-level (Mlvl) feature maps in 2D\n    points-based detectors.\n\n    Args:\n        strides (list[int] | list[tuple[int, int]]): Strides of anchors\n            in multiple feature levels in order (w, h).\n        offset (float): The offset of points, the value is normalized with\n            corresponding stride. Defaults to 0.5.\n    \"\"\"\n\n    def __init__(self, strides, offset=0.5):\n        self.strides = [_pair(stride) for stride in strides]\n        self.offset = offset\n\n    @property\n    def num_levels(self):\n        \"\"\"int: number of feature levels that the generator will be applied\"\"\"\n        return len(self.strides)\n\n    @property\n    def num_base_priors(self):\n        \"\"\"list[int]: The number of priors (points) at a point\n        on the feature grid\"\"\"\n        return [1 for _ in range(len(self.strides))]\n\n    def _meshgrid(self, x, y, row_major=True):\n        yy, xx = torch.meshgrid(y, x)\n        if row_major:\n            # warning .flatten() would cause error in ONNX exporting\n            # have to use reshape here\n            return xx.reshape(-1), yy.reshape(-1)\n\n        else:\n            return yy.reshape(-1), xx.reshape(-1)\n\n    def grid_priors(self,\n                    featmap_sizes,\n                    dtype=torch.float32,\n                    device='cuda',\n                    with_stride=False):\n        \"\"\"Generate grid points of multiple feature levels.\n\n        Args:\n            featmap_sizes (list[tuple]): List of feature map sizes in\n                multiple feature levels, each size arrange as\n                as (h, w).\n            dtype (:obj:`dtype`): Dtype of priors. Default: torch.float32.\n            device (str): The device where the anchors will be put on.\n            with_stride (bool): Whether to concatenate the stride to\n                the last dimension of points.\n\n        Return:\n            list[torch.Tensor]: Points of  multiple feature levels.\n            The sizes of each tensor should be (N, 2) when with stride is\n            ``False``, where N = width * height, width and height\n            are the sizes of the corresponding feature level,\n            and the last dimension 2 represent (coord_x, coord_y),\n            otherwise the shape should be (N, 4),\n            and the last dimension 4 represent\n            (coord_x, coord_y, stride_w, stride_h).\n        \"\"\"\n\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_priors = []\n        for i in range(self.num_levels):\n            priors = self.single_level_grid_priors(\n                featmap_sizes[i],\n                level_idx=i,\n                dtype=dtype,\n                device=device,\n                with_stride=with_stride)\n            multi_level_priors.append(priors)\n        return multi_level_priors\n\n    def single_level_grid_priors(self,\n                                 featmap_size,\n                                 level_idx,\n                                 dtype=torch.float32,\n                                 device='cuda',\n                                 with_stride=False):\n        \"\"\"Generate grid Points of a single level.\n\n        Note:\n            This function is usually called by method ``self.grid_priors``.\n\n        Args:\n            featmap_size (tuple[int]): Size of the feature maps, arrange as\n                (h, w).\n            level_idx (int): The index of corresponding feature map level.\n            dtype (:obj:`dtype`): Dtype of priors. Default: torch.float32.\n            device (str, optional): The device the tensor will be put on.\n                Defaults to 'cuda'.\n            with_stride (bool): Concatenate the stride to the last dimension\n                of points.\n\n        Return:\n            Tensor: Points of single feature levels.\n            The shape of tensor should be (N, 2) when with stride is\n            ``False``, where N = width * height, width and height\n            are the sizes of the corresponding feature level,\n            and the last dimension 2 represent (coord_x, coord_y),\n            otherwise the shape should be (N, 4),\n            and the last dimension 4 represent\n            (coord_x, coord_y, stride_w, stride_h).\n        \"\"\"\n        feat_h, feat_w = featmap_size\n        stride_w, stride_h = self.strides[level_idx]\n        shift_x = (torch.arange(0, feat_w, device=device) +\n                   self.offset) * stride_w\n        # keep featmap_size as Tensor instead of int, so that we\n        # can convert to ONNX correctly\n        shift_x = shift_x.to(dtype)\n\n        shift_y = (torch.arange(0, feat_h, device=device) +\n                   self.offset) * stride_h\n        # keep featmap_size as Tensor instead of int, so that we\n        # can convert to ONNX correctly\n        shift_y = shift_y.to(dtype)\n        shift_xx, shift_yy = self._meshgrid(shift_x, shift_y)\n        if not with_stride:\n            shifts = torch.stack([shift_xx, shift_yy], dim=-1)\n        else:\n            # use `shape[0]` instead of `len(shift_xx)` for ONNX export\n            stride_w = shift_xx.new_full((shift_xx.shape[0], ),\n                                         stride_w).to(dtype)\n            stride_h = shift_xx.new_full((shift_yy.shape[0], ),\n                                         stride_h).to(dtype)\n            shifts = torch.stack([shift_xx, shift_yy, stride_w, stride_h],\n                                 dim=-1)\n        all_points = shifts.to(device)\n        return all_points\n\n    def valid_flags(self, featmap_sizes, pad_shape, device='cuda'):\n        \"\"\"Generate valid flags of points of multiple feature levels.\n\n        Args:\n            featmap_sizes (list(tuple)): List of feature map sizes in\n                multiple feature levels, each size arrange as\n                as (h, w).\n            pad_shape (tuple(int)): The padded shape of the image,\n                 arrange as (h, w).\n            device (str): The device where the anchors will be put on.\n\n        Return:\n            list(torch.Tensor): Valid flags of points of multiple levels.\n        \"\"\"\n        assert self.num_levels == len(featmap_sizes)\n        multi_level_flags = []\n        for i in range(self.num_levels):\n            point_stride = self.strides[i]\n            feat_h, feat_w = featmap_sizes[i]\n            h, w = pad_shape[:2]\n            valid_feat_h = min(int(np.ceil(h / point_stride[1])), feat_h)\n            valid_feat_w = min(int(np.ceil(w / point_stride[0])), feat_w)\n            flags = self.single_level_valid_flags((feat_h, feat_w),\n                                                  (valid_feat_h, valid_feat_w),\n                                                  device=device)\n            multi_level_flags.append(flags)\n        return multi_level_flags\n\n    def single_level_valid_flags(self,\n                                 featmap_size,\n                                 valid_size,\n                                 device='cuda'):\n        \"\"\"Generate the valid flags of points of a single feature map.\n\n        Args:\n            featmap_size (tuple[int]): The size of feature maps, arrange as\n                as (h, w).\n            valid_size (tuple[int]): The valid size of the feature maps.\n                The size arrange as as (h, w).\n            device (str, optional): The device where the flags will be put on.\n                Defaults to 'cuda'.\n\n        Returns:\n            torch.Tensor: The valid flags of each points in a single level \\\n                feature map.\n        \"\"\"\n        feat_h, feat_w = featmap_size\n        valid_h, valid_w = valid_size\n        assert valid_h <= feat_h and valid_w <= feat_w\n        valid_x = torch.zeros(feat_w, dtype=torch.bool, device=device)\n        valid_y = torch.zeros(feat_h, dtype=torch.bool, device=device)\n        valid_x[:valid_w] = 1\n        valid_y[:valid_h] = 1\n        valid_xx, valid_yy = self._meshgrid(valid_x, valid_y)\n        valid = valid_xx & valid_yy\n        return valid\n\n    def sparse_priors(self,\n                      prior_idxs,\n                      featmap_size,\n                      level_idx,\n                      dtype=torch.float32,\n                      device='cuda'):\n        \"\"\"Generate sparse points according to the ``prior_idxs``.\n\n        Args:\n            prior_idxs (Tensor): The index of corresponding anchors\n                in the feature map.\n            featmap_size (tuple[int]): feature map size arrange as (w, h).\n            level_idx (int): The level index of corresponding feature\n                map.\n            dtype (obj:`torch.dtype`): Date type of points. Defaults to\n                ``torch.float32``.\n            device (obj:`torch.device`): The device where the points is\n                located.\n        Returns:\n            Tensor: Anchor with shape (N, 2), N should be equal to\n            the length of ``prior_idxs``. And last dimension\n            2 represent (coord_x, coord_y).\n        \"\"\"\n        height, width = featmap_size\n        x = (prior_idxs % width + self.offset) * self.strides[level_idx][0]\n        y = ((prior_idxs // width) % height +\n             self.offset) * self.strides[level_idx][1]\n        prioris = torch.stack([x, y], 1).to(dtype)\n        prioris = prioris.to(device)\n        return prioris\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/anchor/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef images_to_levels(target, num_levels):\n    \"\"\"Convert targets by image to targets by feature level.\n\n    [target_img0, target_img1] -> [target_level0, target_level1, ...]\n    \"\"\"\n    target = torch.stack(target, 0)\n    level_targets = []\n    start = 0\n    for n in num_levels:\n        end = start + n\n        # level_targets.append(target[:, start:end].squeeze(0))\n        level_targets.append(target[:, start:end])\n        start = end\n    return level_targets\n\n\ndef anchor_inside_flags(flat_anchors,\n                        valid_flags,\n                        img_shape,\n                        allowed_border=0):\n    \"\"\"Check whether the anchors are inside the border.\n\n    Args:\n        flat_anchors (torch.Tensor): Flatten anchors, shape (n, 4).\n        valid_flags (torch.Tensor): An existing valid flags of anchors.\n        img_shape (tuple(int)): Shape of current image.\n        allowed_border (int, optional): The border to allow the valid anchor.\n            Defaults to 0.\n\n    Returns:\n        torch.Tensor: Flags indicating whether the anchors are inside a \\\n            valid range.\n    \"\"\"\n    img_h, img_w = img_shape[:2]\n    if allowed_border >= 0:\n        inside_flags = valid_flags & \\\n            (flat_anchors[:, 0] >= -allowed_border) & \\\n            (flat_anchors[:, 1] >= -allowed_border) & \\\n            (flat_anchors[:, 2] < img_w + allowed_border) & \\\n            (flat_anchors[:, 3] < img_h + allowed_border)\n    else:\n        inside_flags = valid_flags\n    return inside_flags\n\n\ndef calc_region(bbox, ratio, featmap_size=None):\n    \"\"\"Calculate a proportional bbox region.\n\n    The bbox center are fixed and the new h' and w' is h * ratio and w * ratio.\n\n    Args:\n        bbox (Tensor): Bboxes to calculate regions, shape (n, 4).\n        ratio (float): Ratio of the output region.\n        featmap_size (tuple): Feature map size used for clipping the boundary.\n\n    Returns:\n        tuple: x1, y1, x2, y2\n    \"\"\"\n    x1 = torch.round((1 - ratio) * bbox[0] + ratio * bbox[2]).long()\n    y1 = torch.round((1 - ratio) * bbox[1] + ratio * bbox[3]).long()\n    x2 = torch.round(ratio * bbox[0] + (1 - ratio) * bbox[2]).long()\n    y2 = torch.round(ratio * bbox[1] + (1 - ratio) * bbox[3]).long()\n    if featmap_size is not None:\n        x1 = x1.clamp(min=0, max=featmap_size[1])\n        y1 = y1.clamp(min=0, max=featmap_size[0])\n        x2 = x2.clamp(min=0, max=featmap_size[1])\n        y2 = y2.clamp(min=0, max=featmap_size[0])\n    return (x1, y1, x2, y2)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .assigners import (AssignResult, BaseAssigner, CenterRegionAssigner,\n                        MaxIoUAssigner, RegionAssigner)\nfrom .builder import build_assigner, build_bbox_coder, build_sampler\nfrom .coder import (BaseBBoxCoder, DeltaXYWHBBoxCoder, DistancePointBBoxCoder,\n                    PseudoBBoxCoder, TBLRBBoxCoder)\nfrom .iou_calculators import BboxOverlaps2D, bbox_overlaps\nfrom .samplers import (BaseSampler, CombinedSampler,\n                       InstanceBalancedPosSampler, IoUBalancedNegSampler,\n                       OHEMSampler, PseudoSampler, RandomSampler,\n                       SamplingResult, ScoreHLRSampler)\nfrom .transforms import (bbox2distance, bbox2result, bbox2roi,\n                         bbox_cxcywh_to_xyxy, bbox_flip, bbox_mapping,\n                         bbox_mapping_back, bbox_rescale, bbox_xyxy_to_cxcywh,\n                         distance2bbox, find_inside_bboxes, roi2bbox)\n\n__all__ = [\n    'bbox_overlaps', 'BboxOverlaps2D', 'BaseAssigner', 'MaxIoUAssigner',\n    'AssignResult', 'BaseSampler', 'PseudoSampler', 'RandomSampler',\n    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',\n    'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'build_assigner',\n    'build_sampler', 'bbox_flip', 'bbox_mapping', 'bbox_mapping_back',\n    'bbox2roi', 'roi2bbox', 'bbox2result', 'distance2bbox', 'bbox2distance',\n    'build_bbox_coder', 'BaseBBoxCoder', 'PseudoBBoxCoder',\n    'DeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'DistancePointBBoxCoder',\n    'CenterRegionAssigner', 'bbox_rescale', 'bbox_cxcywh_to_xyxy',\n    'bbox_xyxy_to_cxcywh', 'RegionAssigner', 'find_inside_bboxes'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .approx_max_iou_assigner import ApproxMaxIoUAssigner\nfrom .assign_result import AssignResult\nfrom .atss_assigner import ATSSAssigner\nfrom .base_assigner import BaseAssigner\nfrom .center_region_assigner import CenterRegionAssigner\nfrom .grid_assigner import GridAssigner\nfrom .hungarian_assigner import HungarianAssigner\nfrom .mask_hungarian_assigner import MaskHungarianAssigner\nfrom .max_iou_assigner import MaxIoUAssigner\nfrom .point_assigner import PointAssigner\nfrom .region_assigner import RegionAssigner\nfrom .sim_ota_assigner import SimOTAAssigner\nfrom .task_aligned_assigner import TaskAlignedAssigner\nfrom .uniform_assigner import UniformAssigner\n\n__all__ = [\n    'BaseAssigner', 'MaxIoUAssigner', 'ApproxMaxIoUAssigner', 'AssignResult',\n    'PointAssigner', 'ATSSAssigner', 'CenterRegionAssigner', 'GridAssigner',\n    'HungarianAssigner', 'RegionAssigner', 'UniformAssigner', 'SimOTAAssigner',\n    'TaskAlignedAssigner', 'MaskHungarianAssigner'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/approx_max_iou_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom .max_iou_assigner import MaxIoUAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass ApproxMaxIoUAssigner(MaxIoUAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Each proposals will be assigned with an integer indicating the ground truth\n     index. (semi-positive index: gt label (0-based), -1: background)\n\n    - -1: negative sample, no assigned gt\n    - semi-positive integer: positive sample, index (0-based) of assigned gt\n\n    Args:\n        pos_iou_thr (float): IoU threshold for positive bboxes.\n        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.\n        min_pos_iou (float): Minimum iou for a bbox to be considered as a\n            positive bbox. Positive samples can have smaller IoU than\n            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).\n        gt_max_assign_all (bool): Whether to assign all bboxes with the same\n            highest overlap with some gt to that gt.\n        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if\n            `gt_bboxes_ignore` is specified). Negative values mean not\n            ignoring any bboxes.\n        ignore_wrt_candidates (bool): Whether to compute the iof between\n            `bboxes` and `gt_bboxes_ignore`, or the contrary.\n        match_low_quality (bool): Whether to allow quality matches. This is\n            usually allowed for RPN and single stage detectors, but not allowed\n            in the second stage.\n        gpu_assign_thr (int): The upper bound of the number of GT for GPU\n            assign. When the number of gt is above this threshold, will assign\n            on CPU device. Negative values mean not assign on CPU.\n    \"\"\"\n\n    def __init__(self,\n                 pos_iou_thr,\n                 neg_iou_thr,\n                 min_pos_iou=.0,\n                 gt_max_assign_all=True,\n                 ignore_iof_thr=-1,\n                 ignore_wrt_candidates=True,\n                 match_low_quality=True,\n                 gpu_assign_thr=-1,\n                 iou_calculator=dict(type='BboxOverlaps2D')):\n        self.pos_iou_thr = pos_iou_thr\n        self.neg_iou_thr = neg_iou_thr\n        self.min_pos_iou = min_pos_iou\n        self.gt_max_assign_all = gt_max_assign_all\n        self.ignore_iof_thr = ignore_iof_thr\n        self.ignore_wrt_candidates = ignore_wrt_candidates\n        self.gpu_assign_thr = gpu_assign_thr\n        self.match_low_quality = match_low_quality\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self,\n               approxs,\n               squares,\n               approxs_per_octave,\n               gt_bboxes,\n               gt_bboxes_ignore=None,\n               gt_labels=None):\n        \"\"\"Assign gt to approxs.\n\n        This method assign a gt bbox to each group of approxs (bboxes),\n        each group of approxs is represent by a base approx (bbox) and\n        will be assigned with -1, or a semi-positive number.\n        background_label (-1) means negative sample,\n        semi-positive number is the index (0-based) of assigned gt.\n        The assignment is done in following steps, the order matters.\n\n        1. assign every bbox to background_label (-1)\n        2. use the max IoU of each group of approxs to assign\n        2. assign proposals whose iou with all gts < neg_iou_thr to background\n        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,\n           assign it to that bbox\n        4. for each gt bbox, assign its nearest proposals (may be more than\n           one) to itself\n\n        Args:\n            approxs (Tensor): Bounding boxes to be assigned,\n                shape(approxs_per_octave*n, 4).\n            squares (Tensor): Base Bounding boxes to be assigned,\n                shape(n, 4).\n            approxs_per_octave (int): number of approxs per octave\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        num_squares = squares.size(0)\n        num_gts = gt_bboxes.size(0)\n\n        if num_squares == 0 or num_gts == 0:\n            # No predictions and/or truth, return empty assignment\n            overlaps = approxs.new(num_gts, num_squares)\n            assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)\n            return assign_result\n\n        # re-organize anchors by approxs_per_octave x num_squares\n        approxs = torch.transpose(\n            approxs.view(num_squares, approxs_per_octave, 4), 0,\n            1).contiguous().view(-1, 4)\n        assign_on_cpu = True if (self.gpu_assign_thr > 0) and (\n            num_gts > self.gpu_assign_thr) else False\n        # compute overlap and assign gt on CPU when number of GT is large\n        if assign_on_cpu:\n            device = approxs.device\n            approxs = approxs.cpu()\n            gt_bboxes = gt_bboxes.cpu()\n            if gt_bboxes_ignore is not None:\n                gt_bboxes_ignore = gt_bboxes_ignore.cpu()\n            if gt_labels is not None:\n                gt_labels = gt_labels.cpu()\n        all_overlaps = self.iou_calculator(approxs, gt_bboxes)\n\n        overlaps, _ = all_overlaps.view(approxs_per_octave, num_squares,\n                                        num_gts).max(dim=0)\n        overlaps = torch.transpose(overlaps, 0, 1)\n\n        if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None\n                and gt_bboxes_ignore.numel() > 0 and squares.numel() > 0):\n            if self.ignore_wrt_candidates:\n                ignore_overlaps = self.iou_calculator(\n                    squares, gt_bboxes_ignore, mode='iof')\n                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)\n            else:\n                ignore_overlaps = self.iou_calculator(\n                    gt_bboxes_ignore, squares, mode='iof')\n                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)\n            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1\n\n        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)\n        if assign_on_cpu:\n            assign_result.gt_inds = assign_result.gt_inds.to(device)\n            assign_result.max_overlaps = assign_result.max_overlaps.to(device)\n            if assign_result.labels is not None:\n                assign_result.labels = assign_result.labels.to(device)\n        return assign_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/assign_result.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.utils import util_mixins\n\n\nclass AssignResult(util_mixins.NiceRepr):\n    \"\"\"Stores assignments between predicted and truth boxes.\n\n    Attributes:\n        num_gts (int): the number of truth boxes considered when computing this\n            assignment\n\n        gt_inds (LongTensor): for each predicted box indicates the 1-based\n            index of the assigned truth box. 0 means unassigned and -1 means\n            ignore.\n\n        max_overlaps (FloatTensor): the iou between the predicted box and its\n            assigned truth box.\n\n        labels (None | LongTensor): If specified, for each predicted box\n            indicates the category label of the assigned truth box.\n\n    Example:\n        >>> # An assign result between 4 predicted boxes and 9 true boxes\n        >>> # where only two boxes were assigned.\n        >>> num_gts = 9\n        >>> max_overlaps = torch.LongTensor([0, .5, .9, 0])\n        >>> gt_inds = torch.LongTensor([-1, 1, 2, 0])\n        >>> labels = torch.LongTensor([0, 3, 4, 0])\n        >>> self = AssignResult(num_gts, gt_inds, max_overlaps, labels)\n        >>> print(str(self))  # xdoctest: +IGNORE_WANT\n        <AssignResult(num_gts=9, gt_inds.shape=(4,), max_overlaps.shape=(4,),\n                      labels.shape=(4,))>\n        >>> # Force addition of gt labels (when adding gt as proposals)\n        >>> new_labels = torch.LongTensor([3, 4, 5])\n        >>> self.add_gt_(new_labels)\n        >>> print(str(self))  # xdoctest: +IGNORE_WANT\n        <AssignResult(num_gts=9, gt_inds.shape=(7,), max_overlaps.shape=(7,),\n                      labels.shape=(7,))>\n    \"\"\"\n\n    def __init__(self, num_gts, gt_inds, max_overlaps, labels=None):\n        self.num_gts = num_gts\n        self.gt_inds = gt_inds\n        self.max_overlaps = max_overlaps\n        self.labels = labels\n        # Interface for possible user-defined properties\n        self._extra_properties = {}\n\n    @property\n    def num_preds(self):\n        \"\"\"int: the number of predictions in this assignment\"\"\"\n        return len(self.gt_inds)\n\n    def set_extra_property(self, key, value):\n        \"\"\"Set user-defined new property.\"\"\"\n        assert key not in self.info\n        self._extra_properties[key] = value\n\n    def get_extra_property(self, key):\n        \"\"\"Get user-defined property.\"\"\"\n        return self._extra_properties.get(key, None)\n\n    @property\n    def info(self):\n        \"\"\"dict: a dictionary of info about the object\"\"\"\n        basic_info = {\n            'num_gts': self.num_gts,\n            'num_preds': self.num_preds,\n            'gt_inds': self.gt_inds,\n            'max_overlaps': self.max_overlaps,\n            'labels': self.labels,\n        }\n        basic_info.update(self._extra_properties)\n        return basic_info\n\n    def __nice__(self):\n        \"\"\"str: a \"nice\" summary string describing this assign result\"\"\"\n        parts = []\n        parts.append(f'num_gts={self.num_gts!r}')\n        if self.gt_inds is None:\n            parts.append(f'gt_inds={self.gt_inds!r}')\n        else:\n            parts.append(f'gt_inds.shape={tuple(self.gt_inds.shape)!r}')\n        if self.max_overlaps is None:\n            parts.append(f'max_overlaps={self.max_overlaps!r}')\n        else:\n            parts.append('max_overlaps.shape='\n                         f'{tuple(self.max_overlaps.shape)!r}')\n        if self.labels is None:\n            parts.append(f'labels={self.labels!r}')\n        else:\n            parts.append(f'labels.shape={tuple(self.labels.shape)!r}')\n        return ', '.join(parts)\n\n    @classmethod\n    def random(cls, **kwargs):\n        \"\"\"Create random AssignResult for tests or debugging.\n\n        Args:\n            num_preds: number of predicted boxes\n            num_gts: number of true boxes\n            p_ignore (float): probability of a predicted box assigned to an\n                ignored truth\n            p_assigned (float): probability of a predicted box not being\n                assigned\n            p_use_label (float | bool): with labels or not\n            rng (None | int | numpy.random.RandomState): seed or state\n\n        Returns:\n            :obj:`AssignResult`: Randomly generated assign results.\n\n        Example:\n            >>> from mmdet.core.bbox.assigners.assign_result import *  # NOQA\n            >>> self = AssignResult.random()\n            >>> print(self.info)\n        \"\"\"\n        from mmdet.core.bbox import demodata\n        rng = demodata.ensure_rng(kwargs.get('rng', None))\n\n        num_gts = kwargs.get('num_gts', None)\n        num_preds = kwargs.get('num_preds', None)\n        p_ignore = kwargs.get('p_ignore', 0.3)\n        p_assigned = kwargs.get('p_assigned', 0.7)\n        p_use_label = kwargs.get('p_use_label', 0.5)\n        num_classes = kwargs.get('p_use_label', 3)\n\n        if num_gts is None:\n            num_gts = rng.randint(0, 8)\n        if num_preds is None:\n            num_preds = rng.randint(0, 16)\n\n        if num_gts == 0:\n            max_overlaps = torch.zeros(num_preds, dtype=torch.float32)\n            gt_inds = torch.zeros(num_preds, dtype=torch.int64)\n            if p_use_label is True or p_use_label < rng.rand():\n                labels = torch.zeros(num_preds, dtype=torch.int64)\n            else:\n                labels = None\n        else:\n            import numpy as np\n\n            # Create an overlap for each predicted box\n            max_overlaps = torch.from_numpy(rng.rand(num_preds))\n\n            # Construct gt_inds for each predicted box\n            is_assigned = torch.from_numpy(rng.rand(num_preds) < p_assigned)\n            # maximum number of assignments constraints\n            n_assigned = min(num_preds, min(num_gts, is_assigned.sum()))\n\n            assigned_idxs = np.where(is_assigned)[0]\n            rng.shuffle(assigned_idxs)\n            assigned_idxs = assigned_idxs[0:n_assigned]\n            assigned_idxs.sort()\n\n            is_assigned[:] = 0\n            is_assigned[assigned_idxs] = True\n\n            is_ignore = torch.from_numpy(\n                rng.rand(num_preds) < p_ignore) & is_assigned\n\n            gt_inds = torch.zeros(num_preds, dtype=torch.int64)\n\n            true_idxs = np.arange(num_gts)\n            rng.shuffle(true_idxs)\n            true_idxs = torch.from_numpy(true_idxs)\n            gt_inds[is_assigned] = true_idxs[:n_assigned].long()\n\n            gt_inds = torch.from_numpy(\n                rng.randint(1, num_gts + 1, size=num_preds))\n            gt_inds[is_ignore] = -1\n            gt_inds[~is_assigned] = 0\n            max_overlaps[~is_assigned] = 0\n\n            if p_use_label is True or p_use_label < rng.rand():\n                if num_classes == 0:\n                    labels = torch.zeros(num_preds, dtype=torch.int64)\n                else:\n                    labels = torch.from_numpy(\n                        # remind that we set FG labels to [0, num_class-1]\n                        # since mmdet v2.0\n                        # BG cat_id: num_class\n                        rng.randint(0, num_classes, size=num_preds))\n                    labels[~is_assigned] = 0\n            else:\n                labels = None\n\n        self = cls(num_gts, gt_inds, max_overlaps, labels)\n        return self\n\n    def add_gt_(self, gt_labels):\n        \"\"\"Add ground truth as assigned results.\n\n        Args:\n            gt_labels (torch.Tensor): Labels of gt boxes\n        \"\"\"\n        self_inds = torch.arange(\n            1, len(gt_labels) + 1, dtype=torch.long, device=gt_labels.device)\n        self.gt_inds = torch.cat([self_inds, self.gt_inds])\n\n        self.max_overlaps = torch.cat(\n            [self.max_overlaps.new_ones(len(gt_labels)), self.max_overlaps])\n\n        if self.labels is not None:\n            self.labels = torch.cat([gt_labels, self.labels])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/atss_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass ATSSAssigner(BaseAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Each proposals will be assigned with `0` or a positive integer\n    indicating the ground truth index.\n\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    If ``alpha`` is not None, it means that the dynamic cost\n    ATSSAssigner is adopted, which is currently only used in the DDOD.\n\n    Args:\n        topk (float): number of bbox selected in each level\n    \"\"\"\n\n    def __init__(self,\n                 topk,\n                 alpha=None,\n                 iou_calculator=dict(type='BboxOverlaps2D'),\n                 ignore_iof_thr=-1):\n        self.topk = topk\n        self.alpha = alpha\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n        self.ignore_iof_thr = ignore_iof_thr\n\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Args:\n        topk (int): number of bbox selected in each level.\n        alpha (float): param of cost rate for each proposal only in DDOD.\n            Default None.\n        iou_calculator (dict): builder of IoU calculator.\n            Default dict(type='BboxOverlaps2D').\n        ignore_iof_thr (int): whether ignore max overlaps or not.\n            Default -1 (1 or -1).\n    \"\"\"\n\n    # https://github.com/sfzhang15/ATSS/blob/master/atss_core/modeling/rpn/atss/loss.py\n    def assign(self,\n               bboxes,\n               num_level_bboxes,\n               gt_bboxes,\n               gt_bboxes_ignore=None,\n               gt_labels=None,\n               cls_scores=None,\n               bbox_preds=None):\n        \"\"\"Assign gt to bboxes.\n\n        The assignment is done in following steps\n\n        1. compute iou between all bbox (bbox of all pyramid levels) and gt\n        2. compute center distance between all bbox and gt\n        3. on each pyramid level, for each gt, select k bbox whose center\n           are closest to the gt center, so we total select k*l bbox as\n           candidates for each gt\n        4. get corresponding iou for the these candidates, and compute the\n           mean and std, set mean + std as the iou threshold\n        5. select these candidates whose iou are greater than or equal to\n           the threshold as positive\n        6. limit the positive sample's center in gt\n\n        If ``alpha`` is not None, and ``cls_scores`` and `bbox_preds`\n        are not None, the overlaps calculation in the first step\n        will also include dynamic cost, which is currently only used in\n        the DDOD.\n\n        Args:\n            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).\n            num_level_bboxes (List): num of bboxes in each level\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO. Default None.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n            cls_scores (list[Tensor]): Classification scores for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * num_classes. Default None.\n            bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * 4. Default None.\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        INF = 100000000\n        bboxes = bboxes[:, :4]\n        num_gt, num_bboxes = gt_bboxes.size(0), bboxes.size(0)\n\n        message = 'Invalid alpha parameter because cls_scores or ' \\\n                  'bbox_preds are None. If you want to use the ' \\\n                  'cost-based ATSSAssigner,  please set cls_scores, ' \\\n                  'bbox_preds and self.alpha at the same time. '\n\n        if self.alpha is None:\n            # ATSSAssigner\n            overlaps = self.iou_calculator(bboxes, gt_bboxes)\n            if cls_scores is not None or bbox_preds is not None:\n                warnings.warn(message)\n        else:\n            # Dynamic cost ATSSAssigner in DDOD\n            assert cls_scores is not None and bbox_preds is not None, message\n\n            # compute cls cost for bbox and GT\n            cls_cost = torch.sigmoid(cls_scores[:, gt_labels])\n\n            # compute iou between all bbox and gt\n            overlaps = self.iou_calculator(bbox_preds, gt_bboxes)\n\n            # make sure that we are in element-wise multiplication\n            assert cls_cost.shape == overlaps.shape\n\n            # overlaps is actually a cost matrix\n            overlaps = cls_cost**(1 - self.alpha) * overlaps**self.alpha\n\n        # assign 0 by default\n        assigned_gt_inds = overlaps.new_full((num_bboxes, ),\n                                             0,\n                                             dtype=torch.long)\n\n        if num_gt == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            max_overlaps = overlaps.new_zeros((num_bboxes, ))\n            if num_gt == 0:\n                # No truth, assign everything to background\n                assigned_gt_inds[:] = 0\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = overlaps.new_full((num_bboxes, ),\n                                                    -1,\n                                                    dtype=torch.long)\n            return AssignResult(\n                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n\n        # compute center distance between all bbox and gt\n        gt_cx = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0\n        gt_cy = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0\n        gt_points = torch.stack((gt_cx, gt_cy), dim=1)\n\n        bboxes_cx = (bboxes[:, 0] + bboxes[:, 2]) / 2.0\n        bboxes_cy = (bboxes[:, 1] + bboxes[:, 3]) / 2.0\n        bboxes_points = torch.stack((bboxes_cx, bboxes_cy), dim=1)\n\n        distances = (bboxes_points[:, None, :] -\n                     gt_points[None, :, :]).pow(2).sum(-1).sqrt()\n\n        if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None\n                and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0):\n            ignore_overlaps = self.iou_calculator(\n                bboxes, gt_bboxes_ignore, mode='iof')\n            ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)\n            ignore_idxs = ignore_max_overlaps > self.ignore_iof_thr\n            distances[ignore_idxs, :] = INF\n            assigned_gt_inds[ignore_idxs] = -1\n\n        # Selecting candidates based on the center distance\n        candidate_idxs = []\n        start_idx = 0\n        for level, bboxes_per_level in enumerate(num_level_bboxes):\n            # on each pyramid level, for each gt,\n            # select k bbox whose center are closest to the gt center\n            end_idx = start_idx + bboxes_per_level\n            distances_per_level = distances[start_idx:end_idx, :]\n            selectable_k = min(self.topk, bboxes_per_level)\n\n            _, topk_idxs_per_level = distances_per_level.topk(\n                selectable_k, dim=0, largest=False)\n            candidate_idxs.append(topk_idxs_per_level + start_idx)\n            start_idx = end_idx\n        candidate_idxs = torch.cat(candidate_idxs, dim=0)\n\n        # get corresponding iou for the these candidates, and compute the\n        # mean and std, set mean + std as the iou threshold\n        candidate_overlaps = overlaps[candidate_idxs, torch.arange(num_gt)]\n        overlaps_mean_per_gt = candidate_overlaps.mean(0)\n        overlaps_std_per_gt = candidate_overlaps.std(0)\n        overlaps_thr_per_gt = overlaps_mean_per_gt + overlaps_std_per_gt\n\n        is_pos = candidate_overlaps >= overlaps_thr_per_gt[None, :]\n\n        # limit the positive sample's center in gt\n        for gt_idx in range(num_gt):\n            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes\n        ep_bboxes_cx = bboxes_cx.view(1, -1).expand(\n            num_gt, num_bboxes).contiguous().view(-1)\n        ep_bboxes_cy = bboxes_cy.view(1, -1).expand(\n            num_gt, num_bboxes).contiguous().view(-1)\n        candidate_idxs = candidate_idxs.view(-1)\n\n        # calculate the left, top, right, bottom distance between positive\n        # bbox center and gt side\n        l_ = ep_bboxes_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]\n        t_ = ep_bboxes_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]\n        r_ = gt_bboxes[:, 2] - ep_bboxes_cx[candidate_idxs].view(-1, num_gt)\n        b_ = gt_bboxes[:, 3] - ep_bboxes_cy[candidate_idxs].view(-1, num_gt)\n        is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01\n\n        is_pos = is_pos & is_in_gts\n\n        # if an anchor box is assigned to multiple gts,\n        # the one with the highest IoU will be selected.\n        overlaps_inf = torch.full_like(overlaps,\n                                       -INF).t().contiguous().view(-1)\n        index = candidate_idxs.view(-1)[is_pos.view(-1)]\n        overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]\n        overlaps_inf = overlaps_inf.view(num_gt, -1).t()\n\n        max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)\n        assigned_gt_inds[\n            max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1\n\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_inds > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n        else:\n            assigned_labels = None\n        return AssignResult(\n            num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/base_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\n\nclass BaseAssigner(metaclass=ABCMeta):\n    \"\"\"Base assigner that assigns boxes to ground truth boxes.\"\"\"\n\n    @abstractmethod\n    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):\n        \"\"\"Assign boxes to either a ground truth boxes or a negative boxes.\"\"\"\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/center_region_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\ndef scale_boxes(bboxes, scale):\n    \"\"\"Expand an array of boxes by a given scale.\n\n    Args:\n        bboxes (Tensor): Shape (m, 4)\n        scale (float): The scale factor of bboxes\n\n    Returns:\n        (Tensor): Shape (m, 4). Scaled bboxes\n    \"\"\"\n    assert bboxes.size(1) == 4\n    w_half = (bboxes[:, 2] - bboxes[:, 0]) * .5\n    h_half = (bboxes[:, 3] - bboxes[:, 1]) * .5\n    x_c = (bboxes[:, 2] + bboxes[:, 0]) * .5\n    y_c = (bboxes[:, 3] + bboxes[:, 1]) * .5\n\n    w_half *= scale\n    h_half *= scale\n\n    boxes_scaled = torch.zeros_like(bboxes)\n    boxes_scaled[:, 0] = x_c - w_half\n    boxes_scaled[:, 2] = x_c + w_half\n    boxes_scaled[:, 1] = y_c - h_half\n    boxes_scaled[:, 3] = y_c + h_half\n    return boxes_scaled\n\n\ndef is_located_in(points, bboxes):\n    \"\"\"Are points located in bboxes.\n\n    Args:\n      points (Tensor): Points, shape: (m, 2).\n      bboxes (Tensor): Bounding boxes, shape: (n, 4).\n\n    Return:\n      Tensor: Flags indicating if points are located in bboxes, shape: (m, n).\n    \"\"\"\n    assert points.size(1) == 2\n    assert bboxes.size(1) == 4\n    return (points[:, 0].unsqueeze(1) > bboxes[:, 0].unsqueeze(0)) & \\\n           (points[:, 0].unsqueeze(1) < bboxes[:, 2].unsqueeze(0)) & \\\n           (points[:, 1].unsqueeze(1) > bboxes[:, 1].unsqueeze(0)) & \\\n           (points[:, 1].unsqueeze(1) < bboxes[:, 3].unsqueeze(0))\n\n\ndef bboxes_area(bboxes):\n    \"\"\"Compute the area of an array of bboxes.\n\n    Args:\n        bboxes (Tensor): The coordinates ox bboxes. Shape: (m, 4)\n\n    Returns:\n        Tensor: Area of the bboxes. Shape: (m, )\n    \"\"\"\n    assert bboxes.size(1) == 4\n    w = (bboxes[:, 2] - bboxes[:, 0])\n    h = (bboxes[:, 3] - bboxes[:, 1])\n    areas = w * h\n    return areas\n\n\n@BBOX_ASSIGNERS.register_module()\nclass CenterRegionAssigner(BaseAssigner):\n    \"\"\"Assign pixels at the center region of a bbox as positive.\n\n    Each proposals will be assigned with `-1`, `0`, or a positive integer\n    indicating the ground truth index.\n    - -1: negative samples\n    - semi-positive numbers: positive sample, index (0-based) of assigned gt\n\n    Args:\n        pos_scale (float): Threshold within which pixels are\n          labelled as positive.\n        neg_scale (float): Threshold above which pixels are\n          labelled as positive.\n        min_pos_iof (float): Minimum iof of a pixel with a gt to be\n          labelled as positive. Default: 1e-2\n        ignore_gt_scale (float): Threshold within which the pixels\n          are ignored when the gt is labelled as shadowed. Default: 0.5\n        foreground_dominate (bool): If True, the bbox will be assigned as\n          positive when a gt's kernel region overlaps with another's shadowed\n          (ignored) region, otherwise it is set as ignored. Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 pos_scale,\n                 neg_scale,\n                 min_pos_iof=1e-2,\n                 ignore_gt_scale=0.5,\n                 foreground_dominate=False,\n                 iou_calculator=dict(type='BboxOverlaps2D')):\n        self.pos_scale = pos_scale\n        self.neg_scale = neg_scale\n        self.min_pos_iof = min_pos_iof\n        self.ignore_gt_scale = ignore_gt_scale\n        self.foreground_dominate = foreground_dominate\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def get_gt_priorities(self, gt_bboxes):\n        \"\"\"Get gt priorities according to their areas.\n\n        Smaller gt has higher priority.\n\n        Args:\n            gt_bboxes (Tensor): Ground truth boxes, shape (k, 4).\n\n        Returns:\n            Tensor: The priority of gts so that gts with larger priority is \\\n              more likely to be assigned. Shape (k, )\n        \"\"\"\n        gt_areas = bboxes_area(gt_bboxes)\n        # Rank all gt bbox areas. Smaller objects has larger priority\n        _, sort_idx = gt_areas.sort(descending=True)\n        sort_idx = sort_idx.argsort()\n        return sort_idx\n\n    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):\n        \"\"\"Assign gt to bboxes.\n\n        This method assigns gts to every bbox (proposal/anchor), each bbox \\\n        will be assigned with -1, or a semi-positive number. -1 means \\\n        negative sample, semi-positive number is the index (0-based) of \\\n        assigned gt.\n\n        Args:\n            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (tensor, optional): Ground truth bboxes that are\n              labelled as `ignored`, e.g., crowd boxes in COCO.\n            gt_labels (tensor, optional): Label of gt_bboxes, shape (num_gts,).\n\n        Returns:\n            :obj:`AssignResult`: The assigned result. Note that \\\n              shadowed_labels of shape (N, 2) is also added as an \\\n              `assign_result` attribute. `shadowed_labels` is a tensor \\\n              composed of N pairs of anchor_ind, class_label], where N \\\n              is the number of anchors that lie in the outer region of a \\\n              gt, anchor_ind is the shadowed anchor index and class_label \\\n              is the shadowed class label.\n\n        Example:\n            >>> self = CenterRegionAssigner(0.2, 0.2)\n            >>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])\n            >>> gt_bboxes = torch.Tensor([[0, 0, 10, 10]])\n            >>> assign_result = self.assign(bboxes, gt_bboxes)\n            >>> expected_gt_inds = torch.LongTensor([1, 0])\n            >>> assert torch.all(assign_result.gt_inds == expected_gt_inds)\n        \"\"\"\n        # There are in total 5 steps in the pixel assignment\n        # 1. Find core (the center region, say inner 0.2)\n        #     and shadow (the relatively ourter part, say inner 0.2-0.5)\n        #     regions of every gt.\n        # 2. Find all prior bboxes that lie in gt_core and gt_shadow regions\n        # 3. Assign prior bboxes in gt_core with a one-hot id of the gt in\n        #      the image.\n        #    3.1. For overlapping objects, the prior bboxes in gt_core is\n        #           assigned with the object with smallest area\n        # 4. Assign prior bboxes with class label according to its gt id.\n        #    4.1. Assign -1 to prior bboxes lying in shadowed gts\n        #    4.2. Assign positive prior boxes with the corresponding label\n        # 5. Find pixels lying in the shadow of an object and assign them with\n        #      background label, but set the loss weight of its corresponding\n        #      gt to zero.\n        assert bboxes.size(1) == 4, 'bboxes must have size of 4'\n        # 1. Find core positive and shadow region of every gt\n        gt_core = scale_boxes(gt_bboxes, self.pos_scale)\n        gt_shadow = scale_boxes(gt_bboxes, self.neg_scale)\n\n        # 2. Find prior bboxes that lie in gt_core and gt_shadow regions\n        bbox_centers = (bboxes[:, 2:4] + bboxes[:, 0:2]) / 2\n        # The center points lie within the gt boxes\n        is_bbox_in_gt = is_located_in(bbox_centers, gt_bboxes)\n        # Only calculate bbox and gt_core IoF. This enables small prior bboxes\n        #   to match large gts\n        bbox_and_gt_core_overlaps = self.iou_calculator(\n            bboxes, gt_core, mode='iof')\n        # The center point of effective priors should be within the gt box\n        is_bbox_in_gt_core = is_bbox_in_gt & (\n            bbox_and_gt_core_overlaps > self.min_pos_iof)  # shape (n, k)\n\n        is_bbox_in_gt_shadow = (\n            self.iou_calculator(bboxes, gt_shadow, mode='iof') >\n            self.min_pos_iof)\n        # Rule out center effective positive pixels\n        is_bbox_in_gt_shadow &= (~is_bbox_in_gt_core)\n\n        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)\n        if num_gts == 0 or num_bboxes == 0:\n            # If no gts exist, assign all pixels to negative\n            assigned_gt_ids = \\\n                is_bbox_in_gt_core.new_zeros((num_bboxes,),\n                                             dtype=torch.long)\n            pixels_in_gt_shadow = assigned_gt_ids.new_empty((0, 2))\n        else:\n            # Step 3: assign a one-hot gt id to each pixel, and smaller objects\n            #    have high priority to assign the pixel.\n            sort_idx = self.get_gt_priorities(gt_bboxes)\n            assigned_gt_ids, pixels_in_gt_shadow = \\\n                self.assign_one_hot_gt_indices(is_bbox_in_gt_core,\n                                               is_bbox_in_gt_shadow,\n                                               gt_priority=sort_idx)\n\n        if gt_bboxes_ignore is not None and gt_bboxes_ignore.numel() > 0:\n            # No ground truth or boxes, return empty assignment\n            gt_bboxes_ignore = scale_boxes(\n                gt_bboxes_ignore, scale=self.ignore_gt_scale)\n            is_bbox_in_ignored_gts = is_located_in(bbox_centers,\n                                                   gt_bboxes_ignore)\n            is_bbox_in_ignored_gts = is_bbox_in_ignored_gts.any(dim=1)\n            assigned_gt_ids[is_bbox_in_ignored_gts] = -1\n\n        # 4. Assign prior bboxes with class label according to its gt id.\n        assigned_labels = None\n        shadowed_pixel_labels = None\n        if gt_labels is not None:\n            # Default assigned label is the background (-1)\n            assigned_labels = assigned_gt_ids.new_full((num_bboxes, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_ids > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[assigned_gt_ids[pos_inds]\n                                                      - 1]\n            # 5. Find pixels lying in the shadow of an object\n            shadowed_pixel_labels = pixels_in_gt_shadow.clone()\n            if pixels_in_gt_shadow.numel() > 0:\n                pixel_idx, gt_idx =\\\n                    pixels_in_gt_shadow[:, 0], pixels_in_gt_shadow[:, 1]\n                assert (assigned_gt_ids[pixel_idx] != gt_idx).all(), \\\n                    'Some pixels are dually assigned to ignore and gt!'\n                shadowed_pixel_labels[:, 1] = gt_labels[gt_idx - 1]\n                override = (\n                    assigned_labels[pixel_idx] == shadowed_pixel_labels[:, 1])\n                if self.foreground_dominate:\n                    # When a pixel is both positive and shadowed, set it as pos\n                    shadowed_pixel_labels = shadowed_pixel_labels[~override]\n                else:\n                    # When a pixel is both pos and shadowed, set it as shadowed\n                    assigned_labels[pixel_idx[override]] = -1\n                    assigned_gt_ids[pixel_idx[override]] = 0\n\n        assign_result = AssignResult(\n            num_gts, assigned_gt_ids, None, labels=assigned_labels)\n        # Add shadowed_labels as assign_result property. Shape: (num_shadow, 2)\n        assign_result.set_extra_property('shadowed_labels',\n                                         shadowed_pixel_labels)\n        return assign_result\n\n    def assign_one_hot_gt_indices(self,\n                                  is_bbox_in_gt_core,\n                                  is_bbox_in_gt_shadow,\n                                  gt_priority=None):\n        \"\"\"Assign only one gt index to each prior box.\n\n        Gts with large gt_priority are more likely to be assigned.\n\n        Args:\n            is_bbox_in_gt_core (Tensor): Bool tensor indicating the bbox center\n              is in the core area of a gt (e.g. 0-0.2).\n              Shape: (num_prior, num_gt).\n            is_bbox_in_gt_shadow (Tensor): Bool tensor indicating the bbox\n              center is in the shadowed area of a gt (e.g. 0.2-0.5).\n              Shape: (num_prior, num_gt).\n            gt_priority (Tensor): Priorities of gts. The gt with a higher\n              priority is more likely to be assigned to the bbox when the bbox\n              match with multiple gts. Shape: (num_gt, ).\n\n        Returns:\n            tuple: Returns (assigned_gt_inds, shadowed_gt_inds).\n\n                - assigned_gt_inds: The assigned gt index of each prior bbox \\\n                    (i.e. index from 1 to num_gts). Shape: (num_prior, ).\n                - shadowed_gt_inds: shadowed gt indices. It is a tensor of \\\n                    shape (num_ignore, 2) with first column being the \\\n                    shadowed prior bbox indices and the second column the \\\n                    shadowed gt indices (1-based).\n        \"\"\"\n        num_bboxes, num_gts = is_bbox_in_gt_core.shape\n\n        if gt_priority is None:\n            gt_priority = torch.arange(\n                num_gts, device=is_bbox_in_gt_core.device)\n        assert gt_priority.size(0) == num_gts\n        # The bigger gt_priority, the more preferable to be assigned\n        # The assigned inds are by default 0 (background)\n        assigned_gt_inds = is_bbox_in_gt_core.new_zeros((num_bboxes, ),\n                                                        dtype=torch.long)\n        # Shadowed bboxes are assigned to be background. But the corresponding\n        #   label is ignored during loss calculation, which is done through\n        #   shadowed_gt_inds\n        shadowed_gt_inds = torch.nonzero(is_bbox_in_gt_shadow, as_tuple=False)\n        if is_bbox_in_gt_core.sum() == 0:  # No gt match\n            shadowed_gt_inds[:, 1] += 1  # 1-based. For consistency issue\n            return assigned_gt_inds, shadowed_gt_inds\n\n        # The priority of each prior box and gt pair. If one prior box is\n        #  matched bo multiple gts. Only the pair with the highest priority\n        #  is saved\n        pair_priority = is_bbox_in_gt_core.new_full((num_bboxes, num_gts),\n                                                    -1,\n                                                    dtype=torch.long)\n\n        # Each bbox could match with multiple gts.\n        # The following codes deal with this situation\n        # Matched  bboxes (to any gt). Shape: (num_pos_anchor, )\n        inds_of_match = torch.any(is_bbox_in_gt_core, dim=1)\n        # The matched gt index of each positive bbox. Length >= num_pos_anchor\n        #   , since one bbox could match multiple gts\n        matched_bbox_gt_inds = torch.nonzero(\n            is_bbox_in_gt_core, as_tuple=False)[:, 1]\n        # Assign priority to each bbox-gt pair.\n        pair_priority[is_bbox_in_gt_core] = gt_priority[matched_bbox_gt_inds]\n        _, argmax_priority = pair_priority[inds_of_match].max(dim=1)\n        assigned_gt_inds[inds_of_match] = argmax_priority + 1  # 1-based\n        # Zero-out the assigned anchor box to filter the shadowed gt indices\n        is_bbox_in_gt_core[inds_of_match, argmax_priority] = 0\n        # Concat the shadowed indices due to overlapping with that out side of\n        #   effective scale. shape: (total_num_ignore, 2)\n        shadowed_gt_inds = torch.cat(\n            (shadowed_gt_inds, torch.nonzero(\n                is_bbox_in_gt_core, as_tuple=False)),\n            dim=0)\n        # `is_bbox_in_gt_core` should be changed back to keep arguments intact.\n        is_bbox_in_gt_core[inds_of_match, argmax_priority] = 1\n        # 1-based shadowed gt indices, to be consistent with `assigned_gt_inds`\n        if shadowed_gt_inds.numel() > 0:\n            shadowed_gt_inds[:, 1] += 1\n        return assigned_gt_inds, shadowed_gt_inds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/grid_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass GridAssigner(BaseAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Each proposals will be assigned with `-1`, `0`, or a positive integer\n    indicating the ground truth index.\n\n    - -1: don't care\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        pos_iou_thr (float): IoU threshold for positive bboxes.\n        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.\n        min_pos_iou (float): Minimum iou for a bbox to be considered as a\n            positive bbox. Positive samples can have smaller IoU than\n            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).\n        gt_max_assign_all (bool): Whether to assign all bboxes with the same\n            highest overlap with some gt to that gt.\n    \"\"\"\n\n    def __init__(self,\n                 pos_iou_thr,\n                 neg_iou_thr,\n                 min_pos_iou=.0,\n                 gt_max_assign_all=True,\n                 iou_calculator=dict(type='BboxOverlaps2D')):\n        self.pos_iou_thr = pos_iou_thr\n        self.neg_iou_thr = neg_iou_thr\n        self.min_pos_iou = min_pos_iou\n        self.gt_max_assign_all = gt_max_assign_all\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self, bboxes, box_responsible_flags, gt_bboxes, gt_labels=None):\n        \"\"\"Assign gt to bboxes. The process is very much like the max iou\n        assigner, except that positive samples are constrained within the cell\n        that the gt boxes fell in.\n\n        This method assign a gt bbox to every bbox (proposal/anchor), each bbox\n        will be assigned with -1, 0, or a positive number. -1 means don't care,\n        0 means negative sample, positive number is the index (1-based) of\n        assigned gt.\n        The assignment is done in following steps, the order matters.\n\n        1. assign every bbox to -1\n        2. assign proposals whose iou with all gts <= neg_iou_thr to 0\n        3. for each bbox within a cell, if the iou with its nearest gt >\n            pos_iou_thr and the center of that gt falls inside the cell,\n            assign it to that bbox\n        4. for each gt bbox, assign its nearest proposals within the cell the\n            gt bbox falls in to itself.\n\n        Args:\n            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).\n            box_responsible_flags (Tensor): flag to indicate whether box is\n                responsible for prediction, shape(n, )\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        num_gts, num_bboxes = gt_bboxes.size(0), bboxes.size(0)\n\n        # compute iou between all gt and bboxes\n        overlaps = self.iou_calculator(gt_bboxes, bboxes)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = overlaps.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            max_overlaps = overlaps.new_zeros((num_bboxes, ))\n            if num_gts == 0:\n                # No truth, assign everything to background\n                assigned_gt_inds[:] = 0\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = overlaps.new_full((num_bboxes, ),\n                                                    -1,\n                                                    dtype=torch.long)\n            return AssignResult(\n                num_gts,\n                assigned_gt_inds,\n                max_overlaps,\n                labels=assigned_labels)\n\n        # 2. assign negative: below\n        # for each anchor, which gt best overlaps with it\n        # for each anchor, the max iou of all gts\n        # shape of max_overlaps == argmax_overlaps == num_bboxes\n        max_overlaps, argmax_overlaps = overlaps.max(dim=0)\n\n        if isinstance(self.neg_iou_thr, float):\n            assigned_gt_inds[(max_overlaps >= 0)\n                             & (max_overlaps <= self.neg_iou_thr)] = 0\n        elif isinstance(self.neg_iou_thr, (tuple, list)):\n            assert len(self.neg_iou_thr) == 2\n            assigned_gt_inds[(max_overlaps > self.neg_iou_thr[0])\n                             & (max_overlaps <= self.neg_iou_thr[1])] = 0\n\n        # 3. assign positive: falls into responsible cell and above\n        # positive IOU threshold, the order matters.\n        # the prior condition of comparison is to filter out all\n        # unrelated anchors, i.e. not box_responsible_flags\n        overlaps[:, ~box_responsible_flags.type(torch.bool)] = -1.\n\n        # calculate max_overlaps again, but this time we only consider IOUs\n        # for anchors responsible for prediction\n        max_overlaps, argmax_overlaps = overlaps.max(dim=0)\n\n        # for each gt, which anchor best overlaps with it\n        # for each gt, the max iou of all proposals\n        # shape of gt_max_overlaps == gt_argmax_overlaps == num_gts\n        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)\n\n        pos_inds = (max_overlaps >\n                    self.pos_iou_thr) & box_responsible_flags.type(torch.bool)\n        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1\n\n        # 4. assign positive to max overlapped anchors within responsible cell\n        for i in range(num_gts):\n            if gt_max_overlaps[i] > self.min_pos_iou:\n                if self.gt_max_assign_all:\n                    max_iou_inds = (overlaps[i, :] == gt_max_overlaps[i]) & \\\n                         box_responsible_flags.type(torch.bool)\n                    assigned_gt_inds[max_iou_inds] = i + 1\n                elif box_responsible_flags[gt_argmax_overlaps[i]]:\n                    assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1\n\n        # assign labels of positive anchors\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_inds > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n\n        else:\n            assigned_labels = None\n\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/hungarian_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom scipy.optimize import linear_sum_assignment\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..match_costs import build_match_cost\nfrom ..transforms import bbox_cxcywh_to_xyxy\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass HungarianAssigner(BaseAssigner):\n    \"\"\"Computes one-to-one matching between predictions and ground truth.\n\n    This class computes an assignment between the targets and the predictions\n    based on the costs. The costs are weighted sum of three components:\n    classification cost, regression L1 cost and regression iou cost. The\n    targets don't include the no_object, so generally there are more\n    predictions than targets. After the one-to-one matching, the un-matched\n    are treated as backgrounds. Thus each query prediction will be assigned\n    with `0` or a positive integer indicating the ground truth index:\n\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        cls_weight (int | float, optional): The scale factor for classification\n            cost. Default 1.0.\n        bbox_weight (int | float, optional): The scale factor for regression\n            L1 cost. Default 1.0.\n        iou_weight (int | float, optional): The scale factor for regression\n            iou cost. Default 1.0.\n        iou_calculator (dict | optional): The config for the iou calculation.\n            Default type `BboxOverlaps2D`.\n        iou_mode (str | optional): \"iou\" (intersection over union), \"iof\"\n                (intersection over foreground), or \"giou\" (generalized\n                intersection over union). Default \"giou\".\n    \"\"\"\n\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.),\n                 reg_cost=dict(type='BBoxL1Cost', weight=1.0),\n                 iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0)):\n        self.cls_cost = build_match_cost(cls_cost)\n        self.reg_cost = build_match_cost(reg_cost)\n        self.iou_cost = build_match_cost(iou_cost)\n\n    def assign(self,\n               bbox_pred,\n               cls_pred,\n               gt_bboxes,\n               gt_labels,\n               img_meta,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"Computes one-to-one matching based on the weighted costs.\n\n        This method assign each query prediction to a ground truth or\n        background. The `assigned_gt_inds` with -1 means don't care,\n        0 means negative sample, and positive number is the index (1-based)\n        of assigned gt.\n        The assignment is done in the following steps, the order matters.\n\n        1. assign every prediction to -1\n        2. compute the weighted costs\n        3. do Hungarian matching on CPU based on the costs\n        4. assign all to 0 (background) first, then for each matched pair\n           between predictions and gts, treat this prediction as foreground\n           and assign the corresponding gt index (plus 1) to it.\n\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                [num_query, 4].\n            cls_pred (Tensor): Predicted classification logits, shape\n                [num_query, num_class].\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (x1, y1, x2, y2). Shape [num_gt, 4].\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n            img_meta (dict): Meta information for current image.\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`. Default None.\n            eps (int | float, optional): A value added to the denominator for\n                numerical stability. Default 1e-7.\n\n        Returns:\n            :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),\n                                              -1,\n                                              dtype=torch.long)\n        assigned_labels = bbox_pred.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels)\n        img_h, img_w, _ = img_meta['img_shape']\n        factor = gt_bboxes.new_tensor([img_w, img_h, img_w,\n                                       img_h]).unsqueeze(0)\n\n        # 2. compute the weighted costs\n        # classification and bboxcost.\n        cls_cost = self.cls_cost(cls_pred, gt_labels)\n        # regression L1 cost\n        normalize_gt_bboxes = gt_bboxes / factor\n        reg_cost = self.reg_cost(bbox_pred, normalize_gt_bboxes)\n        # regression iou cost, defaultly giou is used in official DETR.\n        bboxes = bbox_cxcywh_to_xyxy(bbox_pred) * factor\n        iou_cost = self.iou_cost(bboxes, gt_bboxes)\n        # weighted sum of above three costs\n        cost = cls_cost + reg_cost + iou_cost\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            bbox_pred.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            bbox_pred.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n        return AssignResult(\n            num_gts, assigned_gt_inds, None, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/mask_hungarian_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom scipy.optimize import linear_sum_assignment\n\nfrom mmdet.core.bbox.builder import BBOX_ASSIGNERS\nfrom mmdet.core.bbox.match_costs.builder import build_match_cost\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass MaskHungarianAssigner(BaseAssigner):\n    \"\"\"Computes one-to-one matching between predictions and ground truth for\n    mask.\n\n    This class computes an assignment between the targets and the predictions\n    based on the costs. The costs are weighted sum of three components:\n    classification cost, mask focal cost and mask dice cost. The\n    targets don't include the no_object, so generally there are more\n    predictions than targets. After the one-to-one matching, the un-matched\n    are treated as backgrounds. Thus each query prediction will be assigned\n    with `0` or a positive integer indicating the ground truth index:\n\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        cls_cost (:obj:`mmcv.ConfigDict` | dict): Classification cost config.\n        mask_cost (:obj:`mmcv.ConfigDict` | dict): Mask cost config.\n        dice_cost (:obj:`mmcv.ConfigDict` | dict): Dice cost config.\n    \"\"\"\n\n    def __init__(self,\n                 cls_cost=dict(type='ClassificationCost', weight=1.0),\n                 mask_cost=dict(\n                     type='FocalLossCost', weight=1.0, binary_input=True),\n                 dice_cost=dict(type='DiceCost', weight=1.0)):\n        self.cls_cost = build_match_cost(cls_cost)\n        self.mask_cost = build_match_cost(mask_cost)\n        self.dice_cost = build_match_cost(dice_cost)\n\n    def assign(self,\n               cls_pred,\n               mask_pred,\n               gt_labels,\n               gt_mask,\n               img_meta,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"Computes one-to-one matching based on the weighted costs.\n\n        Args:\n            cls_pred (Tensor | None): Class prediction in shape\n                (num_query, cls_out_channels).\n            mask_pred (Tensor): Mask prediction in shape (num_query, H, W).\n            gt_labels (Tensor): Label of 'gt_mask'in shape = (num_gt, ).\n            gt_mask (Tensor): Ground truth mask in shape = (num_gt, H, W).\n            img_meta (dict): Meta information for current image.\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`. Default None.\n            eps (int | float, optional): A value added to the denominator for\n                numerical stability. Default 1e-7.\n\n        Returns:\n            :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            'Only case when gt_bboxes_ignore is None is supported.'\n        # K-Net sometimes passes cls_pred=None to this assigner.\n        # So we should use the shape of mask_pred\n        num_gt, num_query = gt_labels.shape[0], mask_pred.shape[0]\n\n        # 1. assign -1 by default\n        assigned_gt_inds = mask_pred.new_full((num_query, ),\n                                              -1,\n                                              dtype=torch.long)\n        assigned_labels = mask_pred.new_full((num_query, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gt == 0 or num_query == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gt == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            return AssignResult(\n                num_gt, assigned_gt_inds, None, labels=assigned_labels)\n\n        # 2. compute the weighted costs\n        # classification and maskcost.\n        if self.cls_cost.weight != 0 and cls_pred is not None:\n            cls_cost = self.cls_cost(cls_pred, gt_labels)\n        else:\n            cls_cost = 0\n\n        if self.mask_cost.weight != 0:\n            # mask_pred shape = [num_query, h, w]\n            # gt_mask shape = [num_gt, h, w]\n            # mask_cost shape = [num_query, num_gt]\n            mask_cost = self.mask_cost(mask_pred, gt_mask)\n        else:\n            mask_cost = 0\n\n        if self.dice_cost.weight != 0:\n            dice_cost = self.dice_cost(mask_pred, gt_mask)\n        else:\n            dice_cost = 0\n        cost = cls_cost + mask_cost + dice_cost\n\n        # 3. do Hungarian matching on CPU using linear_sum_assignment\n        cost = cost.detach().cpu()\n\n        matched_row_inds, matched_col_inds = linear_sum_assignment(cost)\n        matched_row_inds = torch.from_numpy(matched_row_inds).to(\n            mask_pred.device)\n        matched_col_inds = torch.from_numpy(matched_col_inds).to(\n            mask_pred.device)\n\n        # 4. assign backgrounds and foregrounds\n        # assign all indices to backgrounds first\n        assigned_gt_inds[:] = 0\n        # assign foregrounds based on matching results\n        assigned_gt_inds[matched_row_inds] = matched_col_inds + 1\n        assigned_labels[matched_row_inds] = gt_labels[matched_col_inds]\n        return AssignResult(\n            num_gt, assigned_gt_inds, None, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/max_iou_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass MaxIoUAssigner(BaseAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Each proposals will be assigned with `-1`, or a semi-positive integer\n    indicating the ground truth index.\n\n    - -1: negative sample, no assigned gt\n    - semi-positive integer: positive sample, index (0-based) of assigned gt\n\n    Args:\n        pos_iou_thr (float): IoU threshold for positive bboxes.\n        neg_iou_thr (float or tuple): IoU threshold for negative bboxes.\n        min_pos_iou (float): Minimum iou for a bbox to be considered as a\n            positive bbox. Positive samples can have smaller IoU than\n            pos_iou_thr due to the 4th step (assign max IoU sample to each gt).\n            `min_pos_iou` is set to avoid assigning bboxes that have extremely\n            small iou with GT as positive samples. It brings about 0.3 mAP\n            improvements in 1x schedule but does not affect the performance of\n            3x schedule. More comparisons can be found in\n            `PR #7464 <https://github.com/open-mmlab/mmdetection/pull/7464>`_.\n        gt_max_assign_all (bool): Whether to assign all bboxes with the same\n            highest overlap with some gt to that gt.\n        ignore_iof_thr (float): IoF threshold for ignoring bboxes (if\n            `gt_bboxes_ignore` is specified). Negative values mean not\n            ignoring any bboxes.\n        ignore_wrt_candidates (bool): Whether to compute the iof between\n            `bboxes` and `gt_bboxes_ignore`, or the contrary.\n        match_low_quality (bool): Whether to allow low quality matches. This is\n            usually allowed for RPN and single stage detectors, but not allowed\n            in the second stage. Details are demonstrated in Step 4.\n        gpu_assign_thr (int): The upper bound of the number of GT for GPU\n            assign. When the number of gt is above this threshold, will assign\n            on CPU device. Negative values mean not assign on CPU.\n    \"\"\"\n\n    def __init__(self,\n                 pos_iou_thr,\n                 neg_iou_thr,\n                 min_pos_iou=.0,\n                 gt_max_assign_all=True,\n                 ignore_iof_thr=-1,\n                 ignore_wrt_candidates=True,\n                 match_low_quality=True,\n                 gpu_assign_thr=-1,\n                 iou_calculator=dict(type='BboxOverlaps2D')):\n        self.pos_iou_thr = pos_iou_thr\n        self.neg_iou_thr = neg_iou_thr\n        self.min_pos_iou = min_pos_iou\n        self.gt_max_assign_all = gt_max_assign_all\n        self.ignore_iof_thr = ignore_iof_thr\n        self.ignore_wrt_candidates = ignore_wrt_candidates\n        self.gpu_assign_thr = gpu_assign_thr\n        self.match_low_quality = match_low_quality\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self, bboxes, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):\n        \"\"\"Assign gt to bboxes.\n\n        This method assign a gt bbox to every bbox (proposal/anchor), each bbox\n        will be assigned with -1, or a semi-positive number. -1 means negative\n        sample, semi-positive number is the index (0-based) of assigned gt.\n        The assignment is done in following steps, the order matters.\n\n        1. assign every bbox to the background\n        2. assign proposals whose iou with all gts < neg_iou_thr to 0\n        3. for each bbox, if the iou with its nearest gt >= pos_iou_thr,\n           assign it to that bbox\n        4. for each gt bbox, assign its nearest proposals (may be more than\n           one) to itself\n\n        Args:\n            bboxes (Tensor): Bounding boxes to be assigned, shape(n, 4).\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n\n        Example:\n            >>> self = MaxIoUAssigner(0.5, 0.5)\n            >>> bboxes = torch.Tensor([[0, 0, 10, 10], [10, 10, 20, 20]])\n            >>> gt_bboxes = torch.Tensor([[0, 0, 10, 9]])\n            >>> assign_result = self.assign(bboxes, gt_bboxes)\n            >>> expected_gt_inds = torch.LongTensor([1, 0])\n            >>> assert torch.all(assign_result.gt_inds == expected_gt_inds)\n        \"\"\"\n        assign_on_cpu = True if (self.gpu_assign_thr > 0) and (\n            gt_bboxes.shape[0] > self.gpu_assign_thr) else False\n        # compute overlap and assign gt on CPU when number of GT is large\n        if assign_on_cpu:\n            device = bboxes.device\n            bboxes = bboxes.cpu()\n            gt_bboxes = gt_bboxes.cpu()\n            if gt_bboxes_ignore is not None:\n                gt_bboxes_ignore = gt_bboxes_ignore.cpu()\n            if gt_labels is not None:\n                gt_labels = gt_labels.cpu()\n\n        overlaps = self.iou_calculator(gt_bboxes, bboxes)\n\n        if (self.ignore_iof_thr > 0 and gt_bboxes_ignore is not None\n                and gt_bboxes_ignore.numel() > 0 and bboxes.numel() > 0):\n            if self.ignore_wrt_candidates:\n                ignore_overlaps = self.iou_calculator(\n                    bboxes, gt_bboxes_ignore, mode='iof')\n                ignore_max_overlaps, _ = ignore_overlaps.max(dim=1)\n            else:\n                ignore_overlaps = self.iou_calculator(\n                    gt_bboxes_ignore, bboxes, mode='iof')\n                ignore_max_overlaps, _ = ignore_overlaps.max(dim=0)\n            overlaps[:, ignore_max_overlaps > self.ignore_iof_thr] = -1\n\n        assign_result = self.assign_wrt_overlaps(overlaps, gt_labels)\n        if assign_on_cpu:\n            assign_result.gt_inds = assign_result.gt_inds.to(device)\n            assign_result.max_overlaps = assign_result.max_overlaps.to(device)\n            if assign_result.labels is not None:\n                assign_result.labels = assign_result.labels.to(device)\n        return assign_result\n\n    def assign_wrt_overlaps(self, overlaps, gt_labels=None):\n        \"\"\"Assign w.r.t. the overlaps of bboxes with gts.\n\n        Args:\n            overlaps (Tensor): Overlaps between k gt_bboxes and n bboxes,\n                shape(k, n).\n            gt_labels (Tensor, optional): Labels of k gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        num_gts, num_bboxes = overlaps.size(0), overlaps.size(1)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = overlaps.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            max_overlaps = overlaps.new_zeros((num_bboxes, ))\n            if num_gts == 0:\n                # No truth, assign everything to background\n                assigned_gt_inds[:] = 0\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = overlaps.new_full((num_bboxes, ),\n                                                    -1,\n                                                    dtype=torch.long)\n            return AssignResult(\n                num_gts,\n                assigned_gt_inds,\n                max_overlaps,\n                labels=assigned_labels)\n\n        # for each anchor, which gt best overlaps with it\n        # for each anchor, the max iou of all gts\n        max_overlaps, argmax_overlaps = overlaps.max(dim=0)\n        # for each gt, which anchor best overlaps with it\n        # for each gt, the max iou of all proposals\n        gt_max_overlaps, gt_argmax_overlaps = overlaps.max(dim=1)\n\n        # 2. assign negative: below\n        # the negative inds are set to be 0\n        if isinstance(self.neg_iou_thr, float):\n            assigned_gt_inds[(max_overlaps >= 0)\n                             & (max_overlaps < self.neg_iou_thr)] = 0\n        elif isinstance(self.neg_iou_thr, tuple):\n            assert len(self.neg_iou_thr) == 2\n            assigned_gt_inds[(max_overlaps >= self.neg_iou_thr[0])\n                             & (max_overlaps < self.neg_iou_thr[1])] = 0\n\n        # 3. assign positive: above positive IoU threshold\n        pos_inds = max_overlaps >= self.pos_iou_thr\n        assigned_gt_inds[pos_inds] = argmax_overlaps[pos_inds] + 1\n\n        if self.match_low_quality:\n            # Low-quality matching will overwrite the assigned_gt_inds assigned\n            # in Step 3. Thus, the assigned gt might not be the best one for\n            # prediction.\n            # For example, if bbox A has 0.9 and 0.8 iou with GT bbox 1 & 2,\n            # bbox 1 will be assigned as the best target for bbox A in step 3.\n            # However, if GT bbox 2's gt_argmax_overlaps = A, bbox A's\n            # assigned_gt_inds will be overwritten to be bbox 2.\n            # This might be the reason that it is not used in ROI Heads.\n            for i in range(num_gts):\n                if gt_max_overlaps[i] >= self.min_pos_iou:\n                    if self.gt_max_assign_all:\n                        max_iou_inds = overlaps[i, :] == gt_max_overlaps[i]\n                        assigned_gt_inds[max_iou_inds] = i + 1\n                    else:\n                        assigned_gt_inds[gt_argmax_overlaps[i]] = i + 1\n\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_inds > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n        else:\n            assigned_labels = None\n\n        return AssignResult(\n            num_gts, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/point_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass PointAssigner(BaseAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each point.\n\n    Each proposals will be assigned with `0`, or a positive integer\n    indicating the ground truth index.\n\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n    \"\"\"\n\n    def __init__(self, scale=4, pos_num=3):\n        self.scale = scale\n        self.pos_num = pos_num\n\n    def assign(self, points, gt_bboxes, gt_bboxes_ignore=None, gt_labels=None):\n        \"\"\"Assign gt to points.\n\n        This method assign a gt bbox to every points set, each points set\n        will be assigned with  the background_label (-1), or a label number.\n        -1 is background, and semi-positive number is the index (0-based) of\n        assigned gt.\n        The assignment is done in following steps, the order matters.\n\n        1. assign every points to the background_label (-1)\n        2. A point is assigned to some gt bbox if\n            (i) the point is within the k closest points to the gt bbox\n            (ii) the distance between this point and the gt is smaller than\n                other gt bboxes\n\n        Args:\n            points (Tensor): points to be assigned, shape(n, 3) while last\n                dimension stands for (x, y, stride).\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n                NOTE: currently unused.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        num_points = points.shape[0]\n        num_gts = gt_bboxes.shape[0]\n\n        if num_gts == 0 or num_points == 0:\n            # If no truth assign everything to the background\n            assigned_gt_inds = points.new_full((num_points, ),\n                                               0,\n                                               dtype=torch.long)\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = points.new_full((num_points, ),\n                                                  -1,\n                                                  dtype=torch.long)\n            return AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels)\n\n        points_xy = points[:, :2]\n        points_stride = points[:, 2]\n        points_lvl = torch.log2(\n            points_stride).int()  # [3...,4...,5...,6...,7...]\n        lvl_min, lvl_max = points_lvl.min(), points_lvl.max()\n\n        # assign gt box\n        gt_bboxes_xy = (gt_bboxes[:, :2] + gt_bboxes[:, 2:]) / 2\n        gt_bboxes_wh = (gt_bboxes[:, 2:] - gt_bboxes[:, :2]).clamp(min=1e-6)\n        scale = self.scale\n        gt_bboxes_lvl = ((torch.log2(gt_bboxes_wh[:, 0] / scale) +\n                          torch.log2(gt_bboxes_wh[:, 1] / scale)) / 2).int()\n        gt_bboxes_lvl = torch.clamp(gt_bboxes_lvl, min=lvl_min, max=lvl_max)\n\n        # stores the assigned gt index of each point\n        assigned_gt_inds = points.new_zeros((num_points, ), dtype=torch.long)\n        # stores the assigned gt dist (to this point) of each point\n        assigned_gt_dist = points.new_full((num_points, ), float('inf'))\n        points_range = torch.arange(points.shape[0])\n\n        for idx in range(num_gts):\n            gt_lvl = gt_bboxes_lvl[idx]\n            # get the index of points in this level\n            lvl_idx = gt_lvl == points_lvl\n            points_index = points_range[lvl_idx]\n            # get the points in this level\n            lvl_points = points_xy[lvl_idx, :]\n            # get the center point of gt\n            gt_point = gt_bboxes_xy[[idx], :]\n            # get width and height of gt\n            gt_wh = gt_bboxes_wh[[idx], :]\n            # compute the distance between gt center and\n            #   all points in this level\n            points_gt_dist = ((lvl_points - gt_point) / gt_wh).norm(dim=1)\n            # find the nearest k points to gt center in this level\n            min_dist, min_dist_index = torch.topk(\n                points_gt_dist, self.pos_num, largest=False)\n            # the index of nearest k points to gt center in this level\n            min_dist_points_index = points_index[min_dist_index]\n            # The less_than_recorded_index stores the index\n            #   of min_dist that is less then the assigned_gt_dist. Where\n            #   assigned_gt_dist stores the dist from previous assigned gt\n            #   (if exist) to each point.\n            less_than_recorded_index = min_dist < assigned_gt_dist[\n                min_dist_points_index]\n            # The min_dist_points_index stores the index of points satisfy:\n            #   (1) it is k nearest to current gt center in this level.\n            #   (2) it is closer to current gt center than other gt center.\n            min_dist_points_index = min_dist_points_index[\n                less_than_recorded_index]\n            # assign the result\n            assigned_gt_inds[min_dist_points_index] = idx + 1\n            assigned_gt_dist[min_dist_points_index] = min_dist[\n                less_than_recorded_index]\n\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_full((num_points, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_inds > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n        else:\n            assigned_labels = None\n\n        return AssignResult(\n            num_gts, assigned_gt_inds, None, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/region_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import anchor_inside_flags\nfrom ..builder import BBOX_ASSIGNERS\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\ndef calc_region(bbox, ratio, stride, featmap_size=None):\n    \"\"\"Calculate region of the box defined by the ratio, the ratio is from the\n    center of the box to every edge.\"\"\"\n    # project bbox on the feature\n    f_bbox = bbox / stride\n    x1 = torch.round((1 - ratio) * f_bbox[0] + ratio * f_bbox[2])\n    y1 = torch.round((1 - ratio) * f_bbox[1] + ratio * f_bbox[3])\n    x2 = torch.round(ratio * f_bbox[0] + (1 - ratio) * f_bbox[2])\n    y2 = torch.round(ratio * f_bbox[1] + (1 - ratio) * f_bbox[3])\n    if featmap_size is not None:\n        x1 = x1.clamp(min=0, max=featmap_size[1])\n        y1 = y1.clamp(min=0, max=featmap_size[0])\n        x2 = x2.clamp(min=0, max=featmap_size[1])\n        y2 = y2.clamp(min=0, max=featmap_size[0])\n    return (x1, y1, x2, y2)\n\n\ndef anchor_ctr_inside_region_flags(anchors, stride, region):\n    \"\"\"Get the flag indicate whether anchor centers are inside regions.\"\"\"\n    x1, y1, x2, y2 = region\n    f_anchors = anchors / stride\n    x = (f_anchors[:, 0] + f_anchors[:, 2]) * 0.5\n    y = (f_anchors[:, 1] + f_anchors[:, 3]) * 0.5\n    flags = (x >= x1) & (x <= x2) & (y >= y1) & (y <= y2)\n    return flags\n\n\n@BBOX_ASSIGNERS.register_module()\nclass RegionAssigner(BaseAssigner):\n    \"\"\"Assign a corresponding gt bbox or background to each bbox.\n\n    Each proposals will be assigned with `-1`, `0`, or a positive integer\n    indicating the ground truth index.\n\n    - -1: don't care\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        center_ratio: ratio of the region in the center of the bbox to\n            define positive sample.\n        ignore_ratio: ratio of the region to define ignore samples.\n    \"\"\"\n\n    def __init__(self, center_ratio=0.2, ignore_ratio=0.5):\n        self.center_ratio = center_ratio\n        self.ignore_ratio = ignore_ratio\n\n    def assign(self,\n               mlvl_anchors,\n               mlvl_valid_flags,\n               gt_bboxes,\n               img_meta,\n               featmap_sizes,\n               anchor_scale,\n               anchor_strides,\n               gt_bboxes_ignore=None,\n               gt_labels=None,\n               allowed_border=0):\n        \"\"\"Assign gt to anchors.\n\n        This method assign a gt bbox to every bbox (proposal/anchor), each bbox\n        will be assigned with -1, 0, or a positive number. -1 means don't care,\n        0 means negative sample, positive number is the index (1-based) of\n        assigned gt.\n\n        The assignment is done in following steps, and the order matters.\n\n        1. Assign every anchor to 0 (negative)\n        2. (For each gt_bboxes) Compute ignore flags based on ignore_region\n           then assign -1 to anchors w.r.t. ignore flags\n        3. (For each gt_bboxes) Compute pos flags based on center_region then\n           assign gt_bboxes to anchors w.r.t. pos flags\n        4. (For each gt_bboxes) Compute ignore flags based on adjacent anchor\n           level then assign -1 to anchors w.r.t. ignore flags\n        5. Assign anchor outside of image to -1\n\n        Args:\n            mlvl_anchors (list[Tensor]): Multi level anchors.\n            mlvl_valid_flags (list[Tensor]): Multi level valid flags.\n            gt_bboxes (Tensor): Ground truth bboxes of image\n            img_meta (dict): Meta info of image.\n            featmap_sizes (list[Tensor]): Feature mapsize each level\n            anchor_scale (int): Scale of the anchor.\n            anchor_strides (list[int]): Stride of the anchor.\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n            allowed_border (int, optional): The border to allow the valid\n                anchor. Defaults to 0.\n\n        Returns:\n            :obj:`AssignResult`: The assign result.\n        \"\"\"\n        if gt_bboxes_ignore is not None:\n            raise NotImplementedError\n\n        num_gts = gt_bboxes.shape[0]\n        num_bboxes = sum(x.shape[0] for x in mlvl_anchors)\n\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            max_overlaps = gt_bboxes.new_zeros((num_bboxes, ))\n            assigned_gt_inds = gt_bboxes.new_zeros((num_bboxes, ),\n                                                   dtype=torch.long)\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = gt_bboxes.new_full((num_bboxes, ),\n                                                     -1,\n                                                     dtype=torch.long)\n            return AssignResult(\n                num_gts,\n                assigned_gt_inds,\n                max_overlaps,\n                labels=assigned_labels)\n\n        num_lvls = len(mlvl_anchors)\n        r1 = (1 - self.center_ratio) / 2\n        r2 = (1 - self.ignore_ratio) / 2\n\n        scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0]) *\n                           (gt_bboxes[:, 3] - gt_bboxes[:, 1]))\n        min_anchor_size = scale.new_full(\n            (1, ), float(anchor_scale * anchor_strides[0]))\n        target_lvls = torch.floor(\n            torch.log2(scale) - torch.log2(min_anchor_size) + 0.5)\n        target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long()\n\n        # 1. assign 0 (negative) by default\n        mlvl_assigned_gt_inds = []\n        mlvl_ignore_flags = []\n        for lvl in range(num_lvls):\n            h, w = featmap_sizes[lvl]\n            assert h * w == mlvl_anchors[lvl].shape[0]\n            assigned_gt_inds = gt_bboxes.new_full((h * w, ),\n                                                  0,\n                                                  dtype=torch.long)\n            ignore_flags = torch.zeros_like(assigned_gt_inds)\n            mlvl_assigned_gt_inds.append(assigned_gt_inds)\n            mlvl_ignore_flags.append(ignore_flags)\n\n        for gt_id in range(num_gts):\n            lvl = target_lvls[gt_id].item()\n            featmap_size = featmap_sizes[lvl]\n            stride = anchor_strides[lvl]\n            anchors = mlvl_anchors[lvl]\n            gt_bbox = gt_bboxes[gt_id, :4]\n\n            # Compute regions\n            ignore_region = calc_region(gt_bbox, r2, stride, featmap_size)\n            ctr_region = calc_region(gt_bbox, r1, stride, featmap_size)\n\n            # 2. Assign -1 to ignore flags\n            ignore_flags = anchor_ctr_inside_region_flags(\n                anchors, stride, ignore_region)\n            mlvl_assigned_gt_inds[lvl][ignore_flags] = -1\n\n            # 3. Assign gt_bboxes to pos flags\n            pos_flags = anchor_ctr_inside_region_flags(anchors, stride,\n                                                       ctr_region)\n            mlvl_assigned_gt_inds[lvl][pos_flags] = gt_id + 1\n\n            # 4. Assign -1 to ignore adjacent lvl\n            if lvl > 0:\n                d_lvl = lvl - 1\n                d_anchors = mlvl_anchors[d_lvl]\n                d_featmap_size = featmap_sizes[d_lvl]\n                d_stride = anchor_strides[d_lvl]\n                d_ignore_region = calc_region(gt_bbox, r2, d_stride,\n                                              d_featmap_size)\n                ignore_flags = anchor_ctr_inside_region_flags(\n                    d_anchors, d_stride, d_ignore_region)\n                mlvl_ignore_flags[d_lvl][ignore_flags] = 1\n            if lvl < num_lvls - 1:\n                u_lvl = lvl + 1\n                u_anchors = mlvl_anchors[u_lvl]\n                u_featmap_size = featmap_sizes[u_lvl]\n                u_stride = anchor_strides[u_lvl]\n                u_ignore_region = calc_region(gt_bbox, r2, u_stride,\n                                              u_featmap_size)\n                ignore_flags = anchor_ctr_inside_region_flags(\n                    u_anchors, u_stride, u_ignore_region)\n                mlvl_ignore_flags[u_lvl][ignore_flags] = 1\n\n        # 4. (cont.) Assign -1 to ignore adjacent lvl\n        for lvl in range(num_lvls):\n            ignore_flags = mlvl_ignore_flags[lvl]\n            mlvl_assigned_gt_inds[lvl][ignore_flags] = -1\n\n        # 5. Assign -1 to anchor outside of image\n        flat_assigned_gt_inds = torch.cat(mlvl_assigned_gt_inds)\n        flat_anchors = torch.cat(mlvl_anchors)\n        flat_valid_flags = torch.cat(mlvl_valid_flags)\n        assert (flat_assigned_gt_inds.shape[0] == flat_anchors.shape[0] ==\n                flat_valid_flags.shape[0])\n        inside_flags = anchor_inside_flags(flat_anchors, flat_valid_flags,\n                                           img_meta['img_shape'],\n                                           allowed_border)\n        outside_flags = ~inside_flags\n        flat_assigned_gt_inds[outside_flags] = -1\n\n        if gt_labels is not None:\n            assigned_labels = torch.zeros_like(flat_assigned_gt_inds)\n            pos_flags = assigned_gt_inds > 0\n            assigned_labels[pos_flags] = gt_labels[\n                flat_assigned_gt_inds[pos_flags] - 1]\n        else:\n            assigned_labels = None\n\n        return AssignResult(\n            num_gts, flat_assigned_gt_inds, None, labels=assigned_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/sim_ota_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn.functional as F\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import bbox_overlaps\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass SimOTAAssigner(BaseAssigner):\n    \"\"\"Computes matching between predictions and ground truth.\n\n    Args:\n        center_radius (int | float, optional): Ground truth center size\n            to judge whether a prior is in center. Default 2.5.\n        candidate_topk (int, optional): The candidate top-k which used to\n            get top-k ious to calculate dynamic-k. Default 10.\n        iou_weight (int | float, optional): The scale factor for regression\n            iou cost. Default 3.0.\n        cls_weight (int | float, optional): The scale factor for classification\n            cost. Default 1.0.\n    \"\"\"\n\n    def __init__(self,\n                 center_radius=2.5,\n                 candidate_topk=10,\n                 iou_weight=3.0,\n                 cls_weight=1.0):\n        self.center_radius = center_radius\n        self.candidate_topk = candidate_topk\n        self.iou_weight = iou_weight\n        self.cls_weight = cls_weight\n\n    def assign(self,\n               pred_scores,\n               priors,\n               decoded_bboxes,\n               gt_bboxes,\n               gt_labels,\n               gt_bboxes_ignore=None,\n               eps=1e-7):\n        \"\"\"Assign gt to priors using SimOTA. It will switch to CPU mode when\n        GPU is out of memory.\n        Args:\n            pred_scores (Tensor): Classification scores of one image,\n                a 2D-Tensor with shape [num_priors, num_classes]\n            priors (Tensor): All priors of one image, a 2D-Tensor with shape\n                [num_priors, 4] in [cx, xy, stride_w, stride_y] format.\n            decoded_bboxes (Tensor): Predicted bboxes, a 2D-Tensor with shape\n                [num_priors, 4] in [tl_x, tl_y, br_x, br_y] format.\n            gt_bboxes (Tensor): Ground truth bboxes of one image, a 2D-Tensor\n                with shape [num_gts, 4] in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth labels of one image, a Tensor\n                with shape [num_gts].\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            eps (float): A value added to the denominator for numerical\n                stability. Default 1e-7.\n        Returns:\n            assign_result (obj:`AssignResult`): The assigned result.\n        \"\"\"\n        try:\n            assign_result = self._assign(pred_scores, priors, decoded_bboxes,\n                                         gt_bboxes, gt_labels,\n                                         gt_bboxes_ignore, eps)\n            return assign_result\n        except RuntimeError:\n            origin_device = pred_scores.device\n            warnings.warn('OOM RuntimeError is raised due to the huge memory '\n                          'cost during label assignment. CPU mode is applied '\n                          'in this batch. If you want to avoid this issue, '\n                          'try to reduce the batch size or image size.')\n            torch.cuda.empty_cache()\n\n            pred_scores = pred_scores.cpu()\n            priors = priors.cpu()\n            decoded_bboxes = decoded_bboxes.cpu()\n            gt_bboxes = gt_bboxes.cpu().float()\n            gt_labels = gt_labels.cpu()\n\n            assign_result = self._assign(pred_scores, priors, decoded_bboxes,\n                                         gt_bboxes, gt_labels,\n                                         gt_bboxes_ignore, eps)\n            assign_result.gt_inds = assign_result.gt_inds.to(origin_device)\n            assign_result.max_overlaps = assign_result.max_overlaps.to(\n                origin_device)\n            assign_result.labels = assign_result.labels.to(origin_device)\n\n            return assign_result\n\n    def _assign(self,\n                pred_scores,\n                priors,\n                decoded_bboxes,\n                gt_bboxes,\n                gt_labels,\n                gt_bboxes_ignore=None,\n                eps=1e-7):\n        \"\"\"Assign gt to priors using SimOTA.\n        Args:\n            pred_scores (Tensor): Classification scores of one image,\n                a 2D-Tensor with shape [num_priors, num_classes]\n            priors (Tensor): All priors of one image, a 2D-Tensor with shape\n                [num_priors, 4] in [cx, xy, stride_w, stride_y] format.\n            decoded_bboxes (Tensor): Predicted bboxes, a 2D-Tensor with shape\n                [num_priors, 4] in [tl_x, tl_y, br_x, br_y] format.\n            gt_bboxes (Tensor): Ground truth bboxes of one image, a 2D-Tensor\n                with shape [num_gts, 4] in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth labels of one image, a Tensor\n                with shape [num_gts].\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            eps (float): A value added to the denominator for numerical\n                stability. Default 1e-7.\n        Returns:\n            :obj:`AssignResult`: The assigned result.\n        \"\"\"\n        INF = 100000.0\n        num_gt = gt_bboxes.size(0)\n        num_bboxes = decoded_bboxes.size(0)\n\n        # assign 0 by default\n        assigned_gt_inds = decoded_bboxes.new_full((num_bboxes, ),\n                                                   0,\n                                                   dtype=torch.long)\n        valid_mask, is_in_boxes_and_center = self.get_in_gt_and_in_center_info(\n            priors, gt_bboxes)\n        valid_decoded_bbox = decoded_bboxes[valid_mask]\n        valid_pred_scores = pred_scores[valid_mask]\n        num_valid = valid_decoded_bbox.size(0)\n\n        if num_gt == 0 or num_bboxes == 0 or num_valid == 0:\n            # No ground truth or boxes, return empty assignment\n            max_overlaps = decoded_bboxes.new_zeros((num_bboxes, ))\n            if num_gt == 0:\n                # No truth, assign everything to background\n                assigned_gt_inds[:] = 0\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = decoded_bboxes.new_full((num_bboxes, ),\n                                                          -1,\n                                                          dtype=torch.long)\n            return AssignResult(\n                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n\n        pairwise_ious = bbox_overlaps(valid_decoded_bbox, gt_bboxes)\n        iou_cost = -torch.log(pairwise_ious + eps)\n\n        gt_onehot_label = (\n            F.one_hot(gt_labels.to(torch.int64),\n                      pred_scores.shape[-1]).float().unsqueeze(0).repeat(\n                          num_valid, 1, 1))\n\n        valid_pred_scores = valid_pred_scores.unsqueeze(1).repeat(1, num_gt, 1)\n        cls_cost = (\n            F.binary_cross_entropy(\n                valid_pred_scores.to(dtype=torch.float32).sqrt_(),\n                gt_onehot_label,\n                reduction='none',\n            ).sum(-1).to(dtype=valid_pred_scores.dtype))\n\n        cost_matrix = (\n            cls_cost * self.cls_weight + iou_cost * self.iou_weight +\n            (~is_in_boxes_and_center) * INF)\n\n        matched_pred_ious, matched_gt_inds = \\\n            self.dynamic_k_matching(\n                cost_matrix, pairwise_ious, num_gt, valid_mask)\n\n        # convert to AssignResult format\n        assigned_gt_inds[valid_mask] = matched_gt_inds + 1\n        assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)\n        assigned_labels[valid_mask] = gt_labels[matched_gt_inds].long()\n        max_overlaps = assigned_gt_inds.new_full((num_bboxes, ),\n                                                 -INF,\n                                                 dtype=torch.float32)\n        max_overlaps[valid_mask] = matched_pred_ious\n        return AssignResult(\n            num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n\n    def get_in_gt_and_in_center_info(self, priors, gt_bboxes):\n        num_gt = gt_bboxes.size(0)\n\n        repeated_x = priors[:, 0].unsqueeze(1).repeat(1, num_gt)\n        repeated_y = priors[:, 1].unsqueeze(1).repeat(1, num_gt)\n        repeated_stride_x = priors[:, 2].unsqueeze(1).repeat(1, num_gt)\n        repeated_stride_y = priors[:, 3].unsqueeze(1).repeat(1, num_gt)\n\n        # is prior centers in gt bboxes, shape: [n_prior, n_gt]\n        l_ = repeated_x - gt_bboxes[:, 0]\n        t_ = repeated_y - gt_bboxes[:, 1]\n        r_ = gt_bboxes[:, 2] - repeated_x\n        b_ = gt_bboxes[:, 3] - repeated_y\n\n        deltas = torch.stack([l_, t_, r_, b_], dim=1)\n        is_in_gts = deltas.min(dim=1).values > 0\n        is_in_gts_all = is_in_gts.sum(dim=1) > 0\n\n        # is prior centers in gt centers\n        gt_cxs = (gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2.0\n        gt_cys = (gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2.0\n        ct_box_l = gt_cxs - self.center_radius * repeated_stride_x\n        ct_box_t = gt_cys - self.center_radius * repeated_stride_y\n        ct_box_r = gt_cxs + self.center_radius * repeated_stride_x\n        ct_box_b = gt_cys + self.center_radius * repeated_stride_y\n\n        cl_ = repeated_x - ct_box_l\n        ct_ = repeated_y - ct_box_t\n        cr_ = ct_box_r - repeated_x\n        cb_ = ct_box_b - repeated_y\n\n        ct_deltas = torch.stack([cl_, ct_, cr_, cb_], dim=1)\n        is_in_cts = ct_deltas.min(dim=1).values > 0\n        is_in_cts_all = is_in_cts.sum(dim=1) > 0\n\n        # in boxes or in centers, shape: [num_priors]\n        is_in_gts_or_centers = is_in_gts_all | is_in_cts_all\n\n        # both in boxes and centers, shape: [num_fg, num_gt]\n        is_in_boxes_and_centers = (\n            is_in_gts[is_in_gts_or_centers, :]\n            & is_in_cts[is_in_gts_or_centers, :])\n        return is_in_gts_or_centers, is_in_boxes_and_centers\n\n    def dynamic_k_matching(self, cost, pairwise_ious, num_gt, valid_mask):\n        matching_matrix = torch.zeros_like(cost, dtype=torch.uint8)\n        # select candidate topk ious for dynamic-k calculation\n        candidate_topk = min(self.candidate_topk, pairwise_ious.size(0))\n        topk_ious, _ = torch.topk(pairwise_ious, candidate_topk, dim=0)\n        # calculate dynamic k for each gt\n        dynamic_ks = torch.clamp(topk_ious.sum(0).int(), min=1)\n        for gt_idx in range(num_gt):\n            _, pos_idx = torch.topk(\n                cost[:, gt_idx], k=dynamic_ks[gt_idx], largest=False)\n            matching_matrix[:, gt_idx][pos_idx] = 1\n\n        del topk_ious, dynamic_ks, pos_idx\n\n        prior_match_gt_mask = matching_matrix.sum(1) > 1\n        if prior_match_gt_mask.sum() > 0:\n            cost_min, cost_argmin = torch.min(\n                cost[prior_match_gt_mask, :], dim=1)\n            matching_matrix[prior_match_gt_mask, :] *= 0\n            matching_matrix[prior_match_gt_mask, cost_argmin] = 1\n        # get foreground mask inside box and center prior\n        fg_mask_inboxes = matching_matrix.sum(1) > 0\n        valid_mask[valid_mask.clone()] = fg_mask_inboxes\n\n        matched_gt_inds = matching_matrix[fg_mask_inboxes, :].argmax(1)\n        matched_pred_ious = (matching_matrix *\n                             pairwise_ious).sum(1)[fg_mask_inboxes]\n        return matched_pred_ious, matched_gt_inds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/task_aligned_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\nINF = 100000000\n\n\n@BBOX_ASSIGNERS.register_module()\nclass TaskAlignedAssigner(BaseAssigner):\n    \"\"\"Task aligned assigner used in the paper:\n    `TOOD: Task-aligned One-stage Object Detection.\n    <https://arxiv.org/abs/2108.07755>`_.\n\n    Assign a corresponding gt bbox or background to each predicted bbox.\n    Each bbox will be assigned with `0` or a positive integer\n    indicating the ground truth index.\n\n    - 0: negative sample, no assigned gt\n    - positive integer: positive sample, index (1-based) of assigned gt\n\n    Args:\n        topk (int): number of bbox selected in each level\n        iou_calculator (dict): Config dict for iou calculator.\n            Default: dict(type='BboxOverlaps2D')\n    \"\"\"\n\n    def __init__(self, topk, iou_calculator=dict(type='BboxOverlaps2D')):\n        assert topk >= 1\n        self.topk = topk\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self,\n               pred_scores,\n               decode_bboxes,\n               anchors,\n               gt_bboxes,\n               gt_bboxes_ignore=None,\n               gt_labels=None,\n               alpha=1,\n               beta=6):\n        \"\"\"Assign gt to bboxes.\n\n        The assignment is done in following steps\n\n        1. compute alignment metric between all bbox (bbox of all pyramid\n           levels) and gt\n        2. select top-k bbox as candidates for each gt\n        3. limit the positive sample's center in gt (because the anchor-free\n           detector only can predict positive distance)\n\n\n        Args:\n            pred_scores (Tensor): predicted class probability,\n                shape(n, num_classes)\n            decode_bboxes (Tensor): predicted bounding boxes, shape(n, 4)\n            anchors (Tensor): pre-defined anchors, shape(n, 4).\n            gt_bboxes (Tensor): Groundtruth boxes, shape (k, 4).\n            gt_bboxes_ignore (Tensor, optional): Ground truth bboxes that are\n                labelled as `ignored`, e.g., crowd boxes in COCO.\n            gt_labels (Tensor, optional): Label of gt_bboxes, shape (k, ).\n\n        Returns:\n            :obj:`TaskAlignedAssignResult`: The assign result.\n        \"\"\"\n        anchors = anchors[:, :4]\n        num_gt, num_bboxes = gt_bboxes.size(0), anchors.size(0)\n        # compute alignment metric between all bbox and gt\n        overlaps = self.iou_calculator(decode_bboxes, gt_bboxes).detach()\n        bbox_scores = pred_scores[:, gt_labels].detach()\n        # assign 0 by default\n        assigned_gt_inds = anchors.new_full((num_bboxes, ),\n                                            0,\n                                            dtype=torch.long)\n        assign_metrics = anchors.new_zeros((num_bboxes, ))\n\n        if num_gt == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            max_overlaps = anchors.new_zeros((num_bboxes, ))\n            if num_gt == 0:\n                # No gt boxes, assign everything to background\n                assigned_gt_inds[:] = 0\n            if gt_labels is None:\n                assigned_labels = None\n            else:\n                assigned_labels = anchors.new_full((num_bboxes, ),\n                                                   -1,\n                                                   dtype=torch.long)\n            assign_result = AssignResult(\n                num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n            assign_result.assign_metrics = assign_metrics\n            return assign_result\n\n        # select top-k bboxes as candidates for each gt\n        alignment_metrics = bbox_scores**alpha * overlaps**beta\n        topk = min(self.topk, alignment_metrics.size(0))\n        _, candidate_idxs = alignment_metrics.topk(topk, dim=0, largest=True)\n        candidate_metrics = alignment_metrics[candidate_idxs,\n                                              torch.arange(num_gt)]\n        is_pos = candidate_metrics > 0\n\n        # limit the positive sample's center in gt\n        anchors_cx = (anchors[:, 0] + anchors[:, 2]) / 2.0\n        anchors_cy = (anchors[:, 1] + anchors[:, 3]) / 2.0\n        for gt_idx in range(num_gt):\n            candidate_idxs[:, gt_idx] += gt_idx * num_bboxes\n        ep_anchors_cx = anchors_cx.view(1, -1).expand(\n            num_gt, num_bboxes).contiguous().view(-1)\n        ep_anchors_cy = anchors_cy.view(1, -1).expand(\n            num_gt, num_bboxes).contiguous().view(-1)\n        candidate_idxs = candidate_idxs.view(-1)\n\n        # calculate the left, top, right, bottom distance between positive\n        # bbox center and gt side\n        l_ = ep_anchors_cx[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 0]\n        t_ = ep_anchors_cy[candidate_idxs].view(-1, num_gt) - gt_bboxes[:, 1]\n        r_ = gt_bboxes[:, 2] - ep_anchors_cx[candidate_idxs].view(-1, num_gt)\n        b_ = gt_bboxes[:, 3] - ep_anchors_cy[candidate_idxs].view(-1, num_gt)\n        is_in_gts = torch.stack([l_, t_, r_, b_], dim=1).min(dim=1)[0] > 0.01\n        is_pos = is_pos & is_in_gts\n\n        # if an anchor box is assigned to multiple gts,\n        # the one with the highest iou will be selected.\n        overlaps_inf = torch.full_like(overlaps,\n                                       -INF).t().contiguous().view(-1)\n        index = candidate_idxs.view(-1)[is_pos.view(-1)]\n        overlaps_inf[index] = overlaps.t().contiguous().view(-1)[index]\n        overlaps_inf = overlaps_inf.view(num_gt, -1).t()\n\n        max_overlaps, argmax_overlaps = overlaps_inf.max(dim=1)\n        assigned_gt_inds[\n            max_overlaps != -INF] = argmax_overlaps[max_overlaps != -INF] + 1\n        assign_metrics[max_overlaps != -INF] = alignment_metrics[\n            max_overlaps != -INF, argmax_overlaps[max_overlaps != -INF]]\n\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_inds > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n        else:\n            assigned_labels = None\n        assign_result = AssignResult(\n            num_gt, assigned_gt_inds, max_overlaps, labels=assigned_labels)\n        assign_result.assign_metrics = assign_metrics\n        return assign_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/assigners/uniform_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_ASSIGNERS\nfrom ..iou_calculators import build_iou_calculator\nfrom ..transforms import bbox_xyxy_to_cxcywh\nfrom .assign_result import AssignResult\nfrom .base_assigner import BaseAssigner\n\n\n@BBOX_ASSIGNERS.register_module()\nclass UniformAssigner(BaseAssigner):\n    \"\"\"Uniform Matching between the anchors and gt boxes, which can achieve\n    balance in positive anchors, and gt_bboxes_ignore was not considered for\n    now.\n\n    Args:\n        pos_ignore_thr (float): the threshold to ignore positive anchors\n        neg_ignore_thr (float): the threshold to ignore negative anchors\n        match_times(int): Number of positive anchors for each gt box.\n           Default 4.\n        iou_calculator (dict): iou_calculator config\n    \"\"\"\n\n    def __init__(self,\n                 pos_ignore_thr,\n                 neg_ignore_thr,\n                 match_times=4,\n                 iou_calculator=dict(type='BboxOverlaps2D')):\n        self.match_times = match_times\n        self.pos_ignore_thr = pos_ignore_thr\n        self.neg_ignore_thr = neg_ignore_thr\n        self.iou_calculator = build_iou_calculator(iou_calculator)\n\n    def assign(self,\n               bbox_pred,\n               anchor,\n               gt_bboxes,\n               gt_bboxes_ignore=None,\n               gt_labels=None):\n        num_gts, num_bboxes = gt_bboxes.size(0), bbox_pred.size(0)\n\n        # 1. assign -1 by default\n        assigned_gt_inds = bbox_pred.new_full((num_bboxes, ),\n                                              0,\n                                              dtype=torch.long)\n        assigned_labels = bbox_pred.new_full((num_bboxes, ),\n                                             -1,\n                                             dtype=torch.long)\n        if num_gts == 0 or num_bboxes == 0:\n            # No ground truth or boxes, return empty assignment\n            if num_gts == 0:\n                # No ground truth, assign all to background\n                assigned_gt_inds[:] = 0\n            assign_result = AssignResult(\n                num_gts, assigned_gt_inds, None, labels=assigned_labels)\n            assign_result.set_extra_property(\n                'pos_idx', bbox_pred.new_empty(0, dtype=torch.bool))\n            assign_result.set_extra_property('pos_predicted_boxes',\n                                             bbox_pred.new_empty((0, 4)))\n            assign_result.set_extra_property('target_boxes',\n                                             bbox_pred.new_empty((0, 4)))\n            return assign_result\n\n        # 2. Compute the L1 cost between boxes\n        # Note that we use anchors and predict boxes both\n        cost_bbox = torch.cdist(\n            bbox_xyxy_to_cxcywh(bbox_pred),\n            bbox_xyxy_to_cxcywh(gt_bboxes),\n            p=1)\n        cost_bbox_anchors = torch.cdist(\n            bbox_xyxy_to_cxcywh(anchor), bbox_xyxy_to_cxcywh(gt_bboxes), p=1)\n\n        # We found that topk function has different results in cpu and\n        # cuda mode. In order to ensure consistency with the source code,\n        # we also use cpu mode.\n        # TODO: Check whether the performance of cpu and cuda are the same.\n        C = cost_bbox.cpu()\n        C1 = cost_bbox_anchors.cpu()\n\n        # self.match_times x n\n        index = torch.topk(\n            C,  # c=b,n,x c[i]=n,x\n            k=self.match_times,\n            dim=0,\n            largest=False)[1]\n\n        # self.match_times x n\n        index1 = torch.topk(C1, k=self.match_times, dim=0, largest=False)[1]\n        # (self.match_times*2) x n\n        indexes = torch.cat((index, index1),\n                            dim=1).reshape(-1).to(bbox_pred.device)\n\n        pred_overlaps = self.iou_calculator(bbox_pred, gt_bboxes)\n        anchor_overlaps = self.iou_calculator(anchor, gt_bboxes)\n        pred_max_overlaps, _ = pred_overlaps.max(dim=1)\n        anchor_max_overlaps, _ = anchor_overlaps.max(dim=0)\n\n        # 3. Compute the ignore indexes use gt_bboxes and predict boxes\n        ignore_idx = pred_max_overlaps > self.neg_ignore_thr\n        assigned_gt_inds[ignore_idx] = -1\n\n        # 4. Compute the ignore indexes of positive sample use anchors\n        # and predict boxes\n        pos_gt_index = torch.arange(\n            0, C1.size(1),\n            device=bbox_pred.device).repeat(self.match_times * 2)\n        pos_ious = anchor_overlaps[indexes, pos_gt_index]\n        pos_ignore_idx = pos_ious < self.pos_ignore_thr\n\n        pos_gt_index_with_ignore = pos_gt_index + 1\n        pos_gt_index_with_ignore[pos_ignore_idx] = -1\n        assigned_gt_inds[indexes] = pos_gt_index_with_ignore\n\n        if gt_labels is not None:\n            assigned_labels = assigned_gt_inds.new_full((num_bboxes, ), -1)\n            pos_inds = torch.nonzero(\n                assigned_gt_inds > 0, as_tuple=False).squeeze()\n            if pos_inds.numel() > 0:\n                assigned_labels[pos_inds] = gt_labels[\n                    assigned_gt_inds[pos_inds] - 1]\n        else:\n            assigned_labels = None\n\n        assign_result = AssignResult(\n            num_gts,\n            assigned_gt_inds,\n            anchor_max_overlaps,\n            labels=assigned_labels)\n        assign_result.set_extra_property('pos_idx', ~pos_ignore_idx)\n        assign_result.set_extra_property('pos_predicted_boxes',\n                                         bbox_pred[indexes])\n        assign_result.set_extra_property('target_boxes',\n                                         gt_bboxes[pos_gt_index])\n        return assign_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import Registry, build_from_cfg\n\nBBOX_ASSIGNERS = Registry('bbox_assigner')\nBBOX_SAMPLERS = Registry('bbox_sampler')\nBBOX_CODERS = Registry('bbox_coder')\n\n\ndef build_assigner(cfg, **default_args):\n    \"\"\"Builder of box assigner.\"\"\"\n    return build_from_cfg(cfg, BBOX_ASSIGNERS, default_args)\n\n\ndef build_sampler(cfg, **default_args):\n    \"\"\"Builder of box sampler.\"\"\"\n    return build_from_cfg(cfg, BBOX_SAMPLERS, default_args)\n\n\ndef build_bbox_coder(cfg, **default_args):\n    \"\"\"Builder of box coder.\"\"\"\n    return build_from_cfg(cfg, BBOX_CODERS, default_args)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_bbox_coder import BaseBBoxCoder\nfrom .bucketing_bbox_coder import BucketingBBoxCoder\nfrom .delta_xywh_bbox_coder import DeltaXYWHBBoxCoder\nfrom .distance_point_bbox_coder import DistancePointBBoxCoder\nfrom .legacy_delta_xywh_bbox_coder import LegacyDeltaXYWHBBoxCoder\nfrom .pseudo_bbox_coder import PseudoBBoxCoder\nfrom .tblr_bbox_coder import TBLRBBoxCoder\nfrom .yolo_bbox_coder import YOLOBBoxCoder\n\n__all__ = [\n    'BaseBBoxCoder', 'PseudoBBoxCoder', 'DeltaXYWHBBoxCoder',\n    'LegacyDeltaXYWHBBoxCoder', 'TBLRBBoxCoder', 'YOLOBBoxCoder',\n    'BucketingBBoxCoder', 'DistancePointBBoxCoder'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/base_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\n\nclass BaseBBoxCoder(metaclass=ABCMeta):\n    \"\"\"Base bounding box coder.\"\"\"\n\n    def __init__(self, **kwargs):\n        pass\n\n    @abstractmethod\n    def encode(self, bboxes, gt_bboxes):\n        \"\"\"Encode deltas between bboxes and ground truth boxes.\"\"\"\n\n    @abstractmethod\n    def decode(self, bboxes, bboxes_pred):\n        \"\"\"Decode the predicted bboxes according to prediction and base\n        boxes.\"\"\"\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/bucketing_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\n\nfrom ..builder import BBOX_CODERS\nfrom ..transforms import bbox_rescale\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass BucketingBBoxCoder(BaseBBoxCoder):\n    \"\"\"Bucketing BBox Coder for Side-Aware Boundary Localization (SABL).\n\n    Boundary Localization with Bucketing and Bucketing Guided Rescoring\n    are implemented here.\n\n    Please refer to https://arxiv.org/abs/1912.04260 for more details.\n\n    Args:\n        num_buckets (int): Number of buckets.\n        scale_factor (int): Scale factor of proposals to generate buckets.\n        offset_topk (int): Topk buckets are used to generate\n             bucket fine regression targets. Defaults to 2.\n        offset_upperbound (float): Offset upperbound to generate\n             bucket fine regression targets.\n             To avoid too large offset displacements. Defaults to 1.0.\n        cls_ignore_neighbor (bool): Ignore second nearest bucket or Not.\n             Defaults to True.\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 num_buckets,\n                 scale_factor,\n                 offset_topk=2,\n                 offset_upperbound=1.0,\n                 cls_ignore_neighbor=True,\n                 clip_border=True):\n        super(BucketingBBoxCoder, self).__init__()\n        self.num_buckets = num_buckets\n        self.scale_factor = scale_factor\n        self.offset_topk = offset_topk\n        self.offset_upperbound = offset_upperbound\n        self.cls_ignore_neighbor = cls_ignore_neighbor\n        self.clip_border = clip_border\n\n    def encode(self, bboxes, gt_bboxes):\n        \"\"\"Get bucketing estimation and fine regression targets during\n        training.\n\n        Args:\n            bboxes (torch.Tensor): source boxes, e.g., object proposals.\n            gt_bboxes (torch.Tensor): target of the transformation, e.g.,\n                ground truth boxes.\n\n        Returns:\n           encoded_bboxes(tuple[Tensor]): bucketing estimation\n            and fine regression targets and weights\n        \"\"\"\n\n        assert bboxes.size(0) == gt_bboxes.size(0)\n        assert bboxes.size(-1) == gt_bboxes.size(-1) == 4\n        encoded_bboxes = bbox2bucket(bboxes, gt_bboxes, self.num_buckets,\n                                     self.scale_factor, self.offset_topk,\n                                     self.offset_upperbound,\n                                     self.cls_ignore_neighbor)\n        return encoded_bboxes\n\n    def decode(self, bboxes, pred_bboxes, max_shape=None):\n        \"\"\"Apply transformation `pred_bboxes` to `boxes`.\n        Args:\n            boxes (torch.Tensor): Basic boxes.\n            pred_bboxes (torch.Tensor): Predictions for bucketing estimation\n                and fine regression\n            max_shape (tuple[int], optional): Maximum shape of boxes.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        assert len(pred_bboxes) == 2\n        cls_preds, offset_preds = pred_bboxes\n        assert cls_preds.size(0) == bboxes.size(0) and offset_preds.size(\n            0) == bboxes.size(0)\n        decoded_bboxes = bucket2bbox(bboxes, cls_preds, offset_preds,\n                                     self.num_buckets, self.scale_factor,\n                                     max_shape, self.clip_border)\n\n        return decoded_bboxes\n\n\n@mmcv.jit(coderize=True)\ndef generat_buckets(proposals, num_buckets, scale_factor=1.0):\n    \"\"\"Generate buckets w.r.t bucket number and scale factor of proposals.\n\n    Args:\n        proposals (Tensor): Shape (n, 4)\n        num_buckets (int): Number of buckets.\n        scale_factor (float): Scale factor to rescale proposals.\n\n    Returns:\n        tuple[Tensor]: (bucket_w, bucket_h, l_buckets, r_buckets,\n         t_buckets, d_buckets)\n\n            - bucket_w: Width of buckets on x-axis. Shape (n, ).\n            - bucket_h: Height of buckets on y-axis. Shape (n, ).\n            - l_buckets: Left buckets. Shape (n, ceil(side_num/2)).\n            - r_buckets: Right buckets. Shape (n, ceil(side_num/2)).\n            - t_buckets: Top buckets. Shape (n, ceil(side_num/2)).\n            - d_buckets: Down buckets. Shape (n, ceil(side_num/2)).\n    \"\"\"\n    proposals = bbox_rescale(proposals, scale_factor)\n\n    # number of buckets in each side\n    side_num = int(np.ceil(num_buckets / 2.0))\n    pw = proposals[..., 2] - proposals[..., 0]\n    ph = proposals[..., 3] - proposals[..., 1]\n    px1 = proposals[..., 0]\n    py1 = proposals[..., 1]\n    px2 = proposals[..., 2]\n    py2 = proposals[..., 3]\n\n    bucket_w = pw / num_buckets\n    bucket_h = ph / num_buckets\n\n    # left buckets\n    l_buckets = px1[:, None] + (0.5 + torch.arange(\n        0, side_num).to(proposals).float())[None, :] * bucket_w[:, None]\n    # right buckets\n    r_buckets = px2[:, None] - (0.5 + torch.arange(\n        0, side_num).to(proposals).float())[None, :] * bucket_w[:, None]\n    # top buckets\n    t_buckets = py1[:, None] + (0.5 + torch.arange(\n        0, side_num).to(proposals).float())[None, :] * bucket_h[:, None]\n    # down buckets\n    d_buckets = py2[:, None] - (0.5 + torch.arange(\n        0, side_num).to(proposals).float())[None, :] * bucket_h[:, None]\n    return bucket_w, bucket_h, l_buckets, r_buckets, t_buckets, d_buckets\n\n\n@mmcv.jit(coderize=True)\ndef bbox2bucket(proposals,\n                gt,\n                num_buckets,\n                scale_factor,\n                offset_topk=2,\n                offset_upperbound=1.0,\n                cls_ignore_neighbor=True):\n    \"\"\"Generate buckets estimation and fine regression targets.\n\n    Args:\n        proposals (Tensor): Shape (n, 4)\n        gt (Tensor): Shape (n, 4)\n        num_buckets (int): Number of buckets.\n        scale_factor (float): Scale factor to rescale proposals.\n        offset_topk (int): Topk buckets are used to generate\n             bucket fine regression targets. Defaults to 2.\n        offset_upperbound (float): Offset allowance to generate\n             bucket fine regression targets.\n             To avoid too large offset displacements. Defaults to 1.0.\n        cls_ignore_neighbor (bool): Ignore second nearest bucket or Not.\n             Defaults to True.\n\n    Returns:\n        tuple[Tensor]: (offsets, offsets_weights, bucket_labels, cls_weights).\n\n            - offsets: Fine regression targets. \\\n                Shape (n, num_buckets*2).\n            - offsets_weights: Fine regression weights. \\\n                Shape (n, num_buckets*2).\n            - bucket_labels: Bucketing estimation labels. \\\n                Shape (n, num_buckets*2).\n            - cls_weights: Bucketing estimation weights. \\\n                Shape (n, num_buckets*2).\n    \"\"\"\n    assert proposals.size() == gt.size()\n\n    # generate buckets\n    proposals = proposals.float()\n    gt = gt.float()\n    (bucket_w, bucket_h, l_buckets, r_buckets, t_buckets,\n     d_buckets) = generat_buckets(proposals, num_buckets, scale_factor)\n\n    gx1 = gt[..., 0]\n    gy1 = gt[..., 1]\n    gx2 = gt[..., 2]\n    gy2 = gt[..., 3]\n\n    # generate offset targets and weights\n    # offsets from buckets to gts\n    l_offsets = (l_buckets - gx1[:, None]) / bucket_w[:, None]\n    r_offsets = (r_buckets - gx2[:, None]) / bucket_w[:, None]\n    t_offsets = (t_buckets - gy1[:, None]) / bucket_h[:, None]\n    d_offsets = (d_buckets - gy2[:, None]) / bucket_h[:, None]\n\n    # select top-k nearest buckets\n    l_topk, l_label = l_offsets.abs().topk(\n        offset_topk, dim=1, largest=False, sorted=True)\n    r_topk, r_label = r_offsets.abs().topk(\n        offset_topk, dim=1, largest=False, sorted=True)\n    t_topk, t_label = t_offsets.abs().topk(\n        offset_topk, dim=1, largest=False, sorted=True)\n    d_topk, d_label = d_offsets.abs().topk(\n        offset_topk, dim=1, largest=False, sorted=True)\n\n    offset_l_weights = l_offsets.new_zeros(l_offsets.size())\n    offset_r_weights = r_offsets.new_zeros(r_offsets.size())\n    offset_t_weights = t_offsets.new_zeros(t_offsets.size())\n    offset_d_weights = d_offsets.new_zeros(d_offsets.size())\n    inds = torch.arange(0, proposals.size(0)).to(proposals).long()\n\n    # generate offset weights of top-k nearest buckets\n    for k in range(offset_topk):\n        if k >= 1:\n            offset_l_weights[inds, l_label[:,\n                                           k]] = (l_topk[:, k] <\n                                                  offset_upperbound).float()\n            offset_r_weights[inds, r_label[:,\n                                           k]] = (r_topk[:, k] <\n                                                  offset_upperbound).float()\n            offset_t_weights[inds, t_label[:,\n                                           k]] = (t_topk[:, k] <\n                                                  offset_upperbound).float()\n            offset_d_weights[inds, d_label[:,\n                                           k]] = (d_topk[:, k] <\n                                                  offset_upperbound).float()\n        else:\n            offset_l_weights[inds, l_label[:, k]] = 1.0\n            offset_r_weights[inds, r_label[:, k]] = 1.0\n            offset_t_weights[inds, t_label[:, k]] = 1.0\n            offset_d_weights[inds, d_label[:, k]] = 1.0\n\n    offsets = torch.cat([l_offsets, r_offsets, t_offsets, d_offsets], dim=-1)\n    offsets_weights = torch.cat([\n        offset_l_weights, offset_r_weights, offset_t_weights, offset_d_weights\n    ],\n                                dim=-1)\n\n    # generate bucket labels and weight\n    side_num = int(np.ceil(num_buckets / 2.0))\n    labels = torch.stack(\n        [l_label[:, 0], r_label[:, 0], t_label[:, 0], d_label[:, 0]], dim=-1)\n\n    batch_size = labels.size(0)\n    bucket_labels = F.one_hot(labels.view(-1), side_num).view(batch_size,\n                                                              -1).float()\n    bucket_cls_l_weights = (l_offsets.abs() < 1).float()\n    bucket_cls_r_weights = (r_offsets.abs() < 1).float()\n    bucket_cls_t_weights = (t_offsets.abs() < 1).float()\n    bucket_cls_d_weights = (d_offsets.abs() < 1).float()\n    bucket_cls_weights = torch.cat([\n        bucket_cls_l_weights, bucket_cls_r_weights, bucket_cls_t_weights,\n        bucket_cls_d_weights\n    ],\n                                   dim=-1)\n    # ignore second nearest buckets for cls if necessary\n    if cls_ignore_neighbor:\n        bucket_cls_weights = (~((bucket_cls_weights == 1) &\n                                (bucket_labels == 0))).float()\n    else:\n        bucket_cls_weights[:] = 1.0\n    return offsets, offsets_weights, bucket_labels, bucket_cls_weights\n\n\n@mmcv.jit(coderize=True)\ndef bucket2bbox(proposals,\n                cls_preds,\n                offset_preds,\n                num_buckets,\n                scale_factor=1.0,\n                max_shape=None,\n                clip_border=True):\n    \"\"\"Apply bucketing estimation (cls preds) and fine regression (offset\n    preds) to generate det bboxes.\n\n    Args:\n        proposals (Tensor): Boxes to be transformed. Shape (n, 4)\n        cls_preds (Tensor): bucketing estimation. Shape (n, num_buckets*2).\n        offset_preds (Tensor): fine regression. Shape (n, num_buckets*2).\n        num_buckets (int): Number of buckets.\n        scale_factor (float): Scale factor to rescale proposals.\n        max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W)\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Defaults to True.\n\n    Returns:\n        tuple[Tensor]: (bboxes, loc_confidence).\n\n            - bboxes: predicted bboxes. Shape (n, 4)\n            - loc_confidence: localization confidence of predicted bboxes.\n                Shape (n,).\n    \"\"\"\n\n    side_num = int(np.ceil(num_buckets / 2.0))\n    cls_preds = cls_preds.view(-1, side_num)\n    offset_preds = offset_preds.view(-1, side_num)\n\n    scores = F.softmax(cls_preds, dim=1)\n    score_topk, score_label = scores.topk(2, dim=1, largest=True, sorted=True)\n\n    rescaled_proposals = bbox_rescale(proposals, scale_factor)\n\n    pw = rescaled_proposals[..., 2] - rescaled_proposals[..., 0]\n    ph = rescaled_proposals[..., 3] - rescaled_proposals[..., 1]\n    px1 = rescaled_proposals[..., 0]\n    py1 = rescaled_proposals[..., 1]\n    px2 = rescaled_proposals[..., 2]\n    py2 = rescaled_proposals[..., 3]\n\n    bucket_w = pw / num_buckets\n    bucket_h = ph / num_buckets\n\n    score_inds_l = score_label[0::4, 0]\n    score_inds_r = score_label[1::4, 0]\n    score_inds_t = score_label[2::4, 0]\n    score_inds_d = score_label[3::4, 0]\n    l_buckets = px1 + (0.5 + score_inds_l.float()) * bucket_w\n    r_buckets = px2 - (0.5 + score_inds_r.float()) * bucket_w\n    t_buckets = py1 + (0.5 + score_inds_t.float()) * bucket_h\n    d_buckets = py2 - (0.5 + score_inds_d.float()) * bucket_h\n\n    offsets = offset_preds.view(-1, 4, side_num)\n    inds = torch.arange(proposals.size(0)).to(proposals).long()\n    l_offsets = offsets[:, 0, :][inds, score_inds_l]\n    r_offsets = offsets[:, 1, :][inds, score_inds_r]\n    t_offsets = offsets[:, 2, :][inds, score_inds_t]\n    d_offsets = offsets[:, 3, :][inds, score_inds_d]\n\n    x1 = l_buckets - l_offsets * bucket_w\n    x2 = r_buckets - r_offsets * bucket_w\n    y1 = t_buckets - t_offsets * bucket_h\n    y2 = d_buckets - d_offsets * bucket_h\n\n    if clip_border and max_shape is not None:\n        x1 = x1.clamp(min=0, max=max_shape[1] - 1)\n        y1 = y1.clamp(min=0, max=max_shape[0] - 1)\n        x2 = x2.clamp(min=0, max=max_shape[1] - 1)\n        y2 = y2.clamp(min=0, max=max_shape[0] - 1)\n    bboxes = torch.cat([x1[:, None], y1[:, None], x2[:, None], y2[:, None]],\n                       dim=-1)\n\n    # bucketing guided rescoring\n    loc_confidence = score_topk[:, 0]\n    top2_neighbor_inds = (score_label[:, 0] - score_label[:, 1]).abs() == 1\n    loc_confidence += score_topk[:, 1] * top2_neighbor_inds.float()\n    loc_confidence = loc_confidence.view(-1, 4).mean(dim=1)\n\n    return bboxes, loc_confidence\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/delta_xywh_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom ..builder import BBOX_CODERS\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass DeltaXYWHBBoxCoder(BaseBBoxCoder):\n    \"\"\"Delta XYWH BBox coder.\n\n    Following the practice in `R-CNN <https://arxiv.org/abs/1311.2524>`_,\n    this coder encodes bbox (x1, y1, x2, y2) into delta (dx, dy, dw, dh) and\n    decodes delta (dx, dy, dw, dh) back to original bbox (x1, y1, x2, y2).\n\n    Args:\n        target_means (Sequence[float]): Denormalizing means of target for\n            delta coordinates\n        target_stds (Sequence[float]): Denormalizing standard deviation of\n            target for delta coordinates\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Defaults to True.\n        add_ctr_clamp (bool): Whether to add center clamp, when added, the\n            predicted box is clamped is its center is too far away from\n            the original anchor's center. Only used by YOLOF. Default False.\n        ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.\n            Default 32.\n    \"\"\"\n\n    def __init__(self,\n                 target_means=(0., 0., 0., 0.),\n                 target_stds=(1., 1., 1., 1.),\n                 clip_border=True,\n                 add_ctr_clamp=False,\n                 ctr_clamp=32):\n        super(BaseBBoxCoder, self).__init__()\n        self.means = target_means\n        self.stds = target_stds\n        self.clip_border = clip_border\n        self.add_ctr_clamp = add_ctr_clamp\n        self.ctr_clamp = ctr_clamp\n\n    def encode(self, bboxes, gt_bboxes):\n        \"\"\"Get box regression transformation deltas that can be used to\n        transform the ``bboxes`` into the ``gt_bboxes``.\n\n        Args:\n            bboxes (torch.Tensor): Source boxes, e.g., object proposals.\n            gt_bboxes (torch.Tensor): Target of the transformation, e.g.,\n                ground-truth boxes.\n\n        Returns:\n            torch.Tensor: Box transformation deltas\n        \"\"\"\n\n        assert bboxes.size(0) == gt_bboxes.size(0)\n        assert bboxes.size(-1) == gt_bboxes.size(-1) == 4\n        encoded_bboxes = bbox2delta(bboxes, gt_bboxes, self.means, self.stds)\n        return encoded_bboxes\n\n    def decode(self,\n               bboxes,\n               pred_bboxes,\n               max_shape=None,\n               wh_ratio_clip=16 / 1000):\n        \"\"\"Apply transformation `pred_bboxes` to `boxes`.\n\n        Args:\n            bboxes (torch.Tensor): Basic boxes. Shape (B, N, 4) or (N, 4)\n            pred_bboxes (Tensor): Encoded offsets with respect to each roi.\n               Has shape (B, N, num_classes * 4) or (B, N, 4) or\n               (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H\n               when rois is a grid of anchors.Offset encoding follows [1]_.\n            max_shape (Sequence[int] or torch.Tensor or Sequence[\n               Sequence[int]],optional): Maximum bounds for boxes, specifies\n               (H, W, C) or (H, W). If bboxes shape is (B, N, 4), then\n               the max_shape should be a Sequence[Sequence[int]]\n               and the length of max_shape should also be B.\n            wh_ratio_clip (float, optional): The allowed ratio between\n                width and height.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n\n        assert pred_bboxes.size(0) == bboxes.size(0)\n        if pred_bboxes.ndim == 3:\n            assert pred_bboxes.size(1) == bboxes.size(1)\n\n        if pred_bboxes.ndim == 2 and not torch.onnx.is_in_onnx_export():\n            # single image decode\n            decoded_bboxes = delta2bbox(bboxes, pred_bboxes, self.means,\n                                        self.stds, max_shape, wh_ratio_clip,\n                                        self.clip_border, self.add_ctr_clamp,\n                                        self.ctr_clamp)\n        else:\n            if pred_bboxes.ndim == 3 and not torch.onnx.is_in_onnx_export():\n                warnings.warn(\n                    'DeprecationWarning: onnx_delta2bbox is deprecated '\n                    'in the case of batch decoding and non-ONNX, '\n                    'please use “delta2bbox” instead. In order to improve '\n                    'the decoding speed, the batch function will no '\n                    'longer be supported. ')\n            decoded_bboxes = onnx_delta2bbox(bboxes, pred_bboxes, self.means,\n                                             self.stds, max_shape,\n                                             wh_ratio_clip, self.clip_border,\n                                             self.add_ctr_clamp,\n                                             self.ctr_clamp)\n\n        return decoded_bboxes\n\n\n@mmcv.jit(coderize=True)\ndef bbox2delta(proposals, gt, means=(0., 0., 0., 0.), stds=(1., 1., 1., 1.)):\n    \"\"\"Compute deltas of proposals w.r.t. gt.\n\n    We usually compute the deltas of x, y, w, h of proposals w.r.t ground\n    truth bboxes to get regression target.\n    This is the inverse function of :func:`delta2bbox`.\n\n    Args:\n        proposals (Tensor): Boxes to be transformed, shape (N, ..., 4)\n        gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4)\n        means (Sequence[float]): Denormalizing means for delta coordinates\n        stds (Sequence[float]): Denormalizing standard deviation for delta\n            coordinates\n\n    Returns:\n        Tensor: deltas with shape (N, 4), where columns represent dx, dy,\n            dw, dh.\n    \"\"\"\n    assert proposals.size() == gt.size()\n\n    proposals = proposals.float()\n    gt = gt.float()\n    px = (proposals[..., 0] + proposals[..., 2]) * 0.5\n    py = (proposals[..., 1] + proposals[..., 3]) * 0.5\n    pw = proposals[..., 2] - proposals[..., 0]\n    ph = proposals[..., 3] - proposals[..., 1]\n\n    gx = (gt[..., 0] + gt[..., 2]) * 0.5\n    gy = (gt[..., 1] + gt[..., 3]) * 0.5\n    gw = gt[..., 2] - gt[..., 0]\n    gh = gt[..., 3] - gt[..., 1]\n\n    dx = (gx - px) / pw\n    dy = (gy - py) / ph\n    dw = torch.log(gw / pw)\n    dh = torch.log(gh / ph)\n    deltas = torch.stack([dx, dy, dw, dh], dim=-1)\n\n    means = deltas.new_tensor(means).unsqueeze(0)\n    stds = deltas.new_tensor(stds).unsqueeze(0)\n    deltas = deltas.sub_(means).div_(stds)\n\n    return deltas\n\n\n@mmcv.jit(coderize=True)\ndef delta2bbox(rois,\n               deltas,\n               means=(0., 0., 0., 0.),\n               stds=(1., 1., 1., 1.),\n               max_shape=None,\n               wh_ratio_clip=16 / 1000,\n               clip_border=True,\n               add_ctr_clamp=False,\n               ctr_clamp=32):\n    \"\"\"Apply deltas to shift/scale base boxes.\n\n    Typically the rois are anchor or proposed bounding boxes and the deltas are\n    network outputs used to shift/scale those boxes.\n    This is the inverse function of :func:`bbox2delta`.\n\n    Args:\n        rois (Tensor): Boxes to be transformed. Has shape (N, 4).\n        deltas (Tensor): Encoded offsets relative to each roi.\n            Has shape (N, num_classes * 4) or (N, 4). Note\n            N = num_base_anchors * W * H, when rois is a grid of\n            anchors. Offset encoding follows [1]_.\n        means (Sequence[float]): Denormalizing means for delta coordinates.\n            Default (0., 0., 0., 0.).\n        stds (Sequence[float]): Denormalizing standard deviation for delta\n            coordinates. Default (1., 1., 1., 1.).\n        max_shape (tuple[int, int]): Maximum bounds for boxes, specifies\n           (H, W). Default None.\n        wh_ratio_clip (float): Maximum aspect ratio for boxes. Default\n            16 / 1000.\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Default True.\n        add_ctr_clamp (bool): Whether to add center clamp. When set to True,\n            the center of the prediction bounding box will be clamped to\n            avoid being too far away from the center of the anchor.\n            Only used by YOLOF. Default False.\n        ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.\n            Default 32.\n\n    Returns:\n        Tensor: Boxes with shape (N, num_classes * 4) or (N, 4), where 4\n           represent tl_x, tl_y, br_x, br_y.\n\n    References:\n        .. [1] https://arxiv.org/abs/1311.2524\n\n    Example:\n        >>> rois = torch.Tensor([[ 0.,  0.,  1.,  1.],\n        >>>                      [ 0.,  0.,  1.,  1.],\n        >>>                      [ 0.,  0.,  1.,  1.],\n        >>>                      [ 5.,  5.,  5.,  5.]])\n        >>> deltas = torch.Tensor([[  0.,   0.,   0.,   0.],\n        >>>                        [  1.,   1.,   1.,   1.],\n        >>>                        [  0.,   0.,   2.,  -1.],\n        >>>                        [ 0.7, -1.9, -0.5,  0.3]])\n        >>> delta2bbox(rois, deltas, max_shape=(32, 32, 3))\n        tensor([[0.0000, 0.0000, 1.0000, 1.0000],\n                [0.1409, 0.1409, 2.8591, 2.8591],\n                [0.0000, 0.3161, 4.1945, 0.6839],\n                [5.0000, 5.0000, 5.0000, 5.0000]])\n    \"\"\"\n    num_bboxes, num_classes = deltas.size(0), deltas.size(1) // 4\n    if num_bboxes == 0:\n        return deltas\n\n    deltas = deltas.reshape(-1, 4)\n\n    means = deltas.new_tensor(means).view(1, -1)\n    stds = deltas.new_tensor(stds).view(1, -1)\n    denorm_deltas = deltas * stds + means\n\n    dxy = denorm_deltas[:, :2]\n    dwh = denorm_deltas[:, 2:]\n\n    # Compute width/height of each roi\n    rois_ = rois.repeat(1, num_classes).reshape(-1, 4)\n    pxy = ((rois_[:, :2] + rois_[:, 2:]) * 0.5)\n    pwh = (rois_[:, 2:] - rois_[:, :2])\n\n    dxy_wh = pwh * dxy\n\n    max_ratio = np.abs(np.log(wh_ratio_clip))\n    if add_ctr_clamp:\n        dxy_wh = torch.clamp(dxy_wh, max=ctr_clamp, min=-ctr_clamp)\n        dwh = torch.clamp(dwh, max=max_ratio)\n    else:\n        dwh = dwh.clamp(min=-max_ratio, max=max_ratio)\n\n    gxy = pxy + dxy_wh\n    gwh = pwh * dwh.exp()\n    x1y1 = gxy - (gwh * 0.5)\n    x2y2 = gxy + (gwh * 0.5)\n    bboxes = torch.cat([x1y1, x2y2], dim=-1)\n    if clip_border and max_shape is not None:\n        bboxes[..., 0::2].clamp_(min=0, max=max_shape[1])\n        bboxes[..., 1::2].clamp_(min=0, max=max_shape[0])\n    bboxes = bboxes.reshape(num_bboxes, -1)\n    return bboxes\n\n\ndef onnx_delta2bbox(rois,\n                    deltas,\n                    means=(0., 0., 0., 0.),\n                    stds=(1., 1., 1., 1.),\n                    max_shape=None,\n                    wh_ratio_clip=16 / 1000,\n                    clip_border=True,\n                    add_ctr_clamp=False,\n                    ctr_clamp=32):\n    \"\"\"Apply deltas to shift/scale base boxes.\n\n    Typically the rois are anchor or proposed bounding boxes and the deltas are\n    network outputs used to shift/scale those boxes.\n    This is the inverse function of :func:`bbox2delta`.\n\n    Args:\n        rois (Tensor): Boxes to be transformed. Has shape (N, 4) or (B, N, 4)\n        deltas (Tensor): Encoded offsets with respect to each roi.\n            Has shape (B, N, num_classes * 4) or (B, N, 4) or\n            (N, num_classes * 4) or (N, 4). Note N = num_anchors * W * H\n            when rois is a grid of anchors.Offset encoding follows [1]_.\n        means (Sequence[float]): Denormalizing means for delta coordinates.\n            Default (0., 0., 0., 0.).\n        stds (Sequence[float]): Denormalizing standard deviation for delta\n            coordinates. Default (1., 1., 1., 1.).\n        max_shape (Sequence[int] or torch.Tensor or Sequence[\n            Sequence[int]],optional): Maximum bounds for boxes, specifies\n            (H, W, C) or (H, W). If rois shape is (B, N, 4), then\n            the max_shape should be a Sequence[Sequence[int]]\n            and the length of max_shape should also be B. Default None.\n        wh_ratio_clip (float): Maximum aspect ratio for boxes.\n            Default 16 / 1000.\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Default True.\n        add_ctr_clamp (bool): Whether to add center clamp, when added, the\n            predicted box is clamped is its center is too far away from\n            the original anchor's center. Only used by YOLOF. Default False.\n        ctr_clamp (int): the maximum pixel shift to clamp. Only used by YOLOF.\n            Default 32.\n\n    Returns:\n        Tensor: Boxes with shape (B, N, num_classes * 4) or (B, N, 4) or\n           (N, num_classes * 4) or (N, 4), where 4 represent\n           tl_x, tl_y, br_x, br_y.\n\n    References:\n        .. [1] https://arxiv.org/abs/1311.2524\n\n    Example:\n        >>> rois = torch.Tensor([[ 0.,  0.,  1.,  1.],\n        >>>                      [ 0.,  0.,  1.,  1.],\n        >>>                      [ 0.,  0.,  1.,  1.],\n        >>>                      [ 5.,  5.,  5.,  5.]])\n        >>> deltas = torch.Tensor([[  0.,   0.,   0.,   0.],\n        >>>                        [  1.,   1.,   1.,   1.],\n        >>>                        [  0.,   0.,   2.,  -1.],\n        >>>                        [ 0.7, -1.9, -0.5,  0.3]])\n        >>> delta2bbox(rois, deltas, max_shape=(32, 32, 3))\n        tensor([[0.0000, 0.0000, 1.0000, 1.0000],\n                [0.1409, 0.1409, 2.8591, 2.8591],\n                [0.0000, 0.3161, 4.1945, 0.6839],\n                [5.0000, 5.0000, 5.0000, 5.0000]])\n    \"\"\"\n    means = deltas.new_tensor(means).view(1,\n                                          -1).repeat(1,\n                                                     deltas.size(-1) // 4)\n    stds = deltas.new_tensor(stds).view(1, -1).repeat(1, deltas.size(-1) // 4)\n    denorm_deltas = deltas * stds + means\n    dx = denorm_deltas[..., 0::4]\n    dy = denorm_deltas[..., 1::4]\n    dw = denorm_deltas[..., 2::4]\n    dh = denorm_deltas[..., 3::4]\n\n    x1, y1 = rois[..., 0], rois[..., 1]\n    x2, y2 = rois[..., 2], rois[..., 3]\n    # Compute center of each roi\n    px = ((x1 + x2) * 0.5).unsqueeze(-1).expand_as(dx)\n    py = ((y1 + y2) * 0.5).unsqueeze(-1).expand_as(dy)\n    # Compute width/height of each roi\n    pw = (x2 - x1).unsqueeze(-1).expand_as(dw)\n    ph = (y2 - y1).unsqueeze(-1).expand_as(dh)\n\n    dx_width = pw * dx\n    dy_height = ph * dy\n\n    max_ratio = np.abs(np.log(wh_ratio_clip))\n    if add_ctr_clamp:\n        dx_width = torch.clamp(dx_width, max=ctr_clamp, min=-ctr_clamp)\n        dy_height = torch.clamp(dy_height, max=ctr_clamp, min=-ctr_clamp)\n        dw = torch.clamp(dw, max=max_ratio)\n        dh = torch.clamp(dh, max=max_ratio)\n    else:\n        dw = dw.clamp(min=-max_ratio, max=max_ratio)\n        dh = dh.clamp(min=-max_ratio, max=max_ratio)\n    # Use exp(network energy) to enlarge/shrink each roi\n    gw = pw * dw.exp()\n    gh = ph * dh.exp()\n    # Use network energy to shift the center of each roi\n    gx = px + dx_width\n    gy = py + dy_height\n    # Convert center-xy/width/height to top-left, bottom-right\n    x1 = gx - gw * 0.5\n    y1 = gy - gh * 0.5\n    x2 = gx + gw * 0.5\n    y2 = gy + gh * 0.5\n\n    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size())\n\n    if clip_border and max_shape is not None:\n        # clip bboxes with dynamic `min` and `max` for onnx\n        if torch.onnx.is_in_onnx_export():\n            from mmdet.core.export import dynamic_clip_for_onnx\n            x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape)\n            bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view(deltas.size())\n            return bboxes\n        if not isinstance(max_shape, torch.Tensor):\n            max_shape = x1.new_tensor(max_shape)\n        max_shape = max_shape[..., :2].type_as(x1)\n        if max_shape.ndim == 2:\n            assert bboxes.ndim == 3\n            assert max_shape.size(0) == bboxes.size(0)\n\n        min_xy = x1.new_tensor(0)\n        max_xy = torch.cat(\n            [max_shape] * (deltas.size(-1) // 2),\n            dim=-1).flip(-1).unsqueeze(-2)\n        bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)\n        bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)\n\n    return bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/distance_point_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import BBOX_CODERS\nfrom ..transforms import bbox2distance, distance2bbox\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass DistancePointBBoxCoder(BaseBBoxCoder):\n    \"\"\"Distance Point BBox coder.\n\n    This coder encodes gt bboxes (x1, y1, x2, y2) into (top, bottom, left,\n    right) and decode it back to the original.\n\n    Args:\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Defaults to True.\n    \"\"\"\n\n    def __init__(self, clip_border=True):\n        super(BaseBBoxCoder, self).__init__()\n        self.clip_border = clip_border\n\n    def encode(self, points, gt_bboxes, max_dis=None, eps=0.1):\n        \"\"\"Encode bounding box to distances.\n\n        Args:\n            points (Tensor): Shape (N, 2), The format is [x, y].\n            gt_bboxes (Tensor): Shape (N, 4), The format is \"xyxy\"\n            max_dis (float): Upper bound of the distance. Default None.\n            eps (float): a small value to ensure target < max_dis, instead <=.\n                Default 0.1.\n\n        Returns:\n            Tensor: Box transformation deltas. The shape is (N, 4).\n        \"\"\"\n        assert points.size(0) == gt_bboxes.size(0)\n        assert points.size(-1) == 2\n        assert gt_bboxes.size(-1) == 4\n        return bbox2distance(points, gt_bboxes, max_dis, eps)\n\n    def decode(self, points, pred_bboxes, max_shape=None):\n        \"\"\"Decode distance prediction to bounding box.\n\n        Args:\n            points (Tensor): Shape (B, N, 2) or (N, 2).\n            pred_bboxes (Tensor): Distance from the given point to 4\n                boundaries (left, top, right, bottom). Shape (B, N, 4)\n                or (N, 4)\n            max_shape (Sequence[int] or torch.Tensor or Sequence[\n                Sequence[int]],optional): Maximum bounds for boxes, specifies\n                (H, W, C) or (H, W). If priors shape is (B, N, 4), then\n                the max_shape should be a Sequence[Sequence[int]],\n                and the length of max_shape should also be B.\n                Default None.\n        Returns:\n            Tensor: Boxes with shape (N, 4) or (B, N, 4)\n        \"\"\"\n        assert points.size(0) == pred_bboxes.size(0)\n        assert points.size(-1) == 2\n        assert pred_bboxes.size(-1) == 4\n        if self.clip_border is False:\n            max_shape = None\n        return distance2bbox(points, pred_bboxes, max_shape)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/legacy_delta_xywh_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom ..builder import BBOX_CODERS\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass LegacyDeltaXYWHBBoxCoder(BaseBBoxCoder):\n    \"\"\"Legacy Delta XYWH BBox coder used in MMDet V1.x.\n\n    Following the practice in R-CNN [1]_, this coder encodes bbox (x1, y1, x2,\n    y2) into delta (dx, dy, dw, dh) and decodes delta (dx, dy, dw, dh)\n    back to original bbox (x1, y1, x2, y2).\n\n    Note:\n        The main difference between :class`LegacyDeltaXYWHBBoxCoder` and\n        :class:`DeltaXYWHBBoxCoder` is whether ``+ 1`` is used during width and\n        height calculation. We suggest to only use this coder when testing with\n        MMDet V1.x models.\n\n    References:\n        .. [1] https://arxiv.org/abs/1311.2524\n\n    Args:\n        target_means (Sequence[float]): denormalizing means of target for\n            delta coordinates\n        target_stds (Sequence[float]): denormalizing standard deviation of\n            target for delta coordinates\n    \"\"\"\n\n    def __init__(self,\n                 target_means=(0., 0., 0., 0.),\n                 target_stds=(1., 1., 1., 1.)):\n        super(BaseBBoxCoder, self).__init__()\n        self.means = target_means\n        self.stds = target_stds\n\n    def encode(self, bboxes, gt_bboxes):\n        \"\"\"Get box regression transformation deltas that can be used to\n        transform the ``bboxes`` into the ``gt_bboxes``.\n\n        Args:\n            bboxes (torch.Tensor): source boxes, e.g., object proposals.\n            gt_bboxes (torch.Tensor): target of the transformation, e.g.,\n                ground-truth boxes.\n\n        Returns:\n            torch.Tensor: Box transformation deltas\n        \"\"\"\n        assert bboxes.size(0) == gt_bboxes.size(0)\n        assert bboxes.size(-1) == gt_bboxes.size(-1) == 4\n        encoded_bboxes = legacy_bbox2delta(bboxes, gt_bboxes, self.means,\n                                           self.stds)\n        return encoded_bboxes\n\n    def decode(self,\n               bboxes,\n               pred_bboxes,\n               max_shape=None,\n               wh_ratio_clip=16 / 1000):\n        \"\"\"Apply transformation `pred_bboxes` to `boxes`.\n\n        Args:\n            boxes (torch.Tensor): Basic boxes.\n            pred_bboxes (torch.Tensor): Encoded boxes with shape\n            max_shape (tuple[int], optional): Maximum shape of boxes.\n                Defaults to None.\n            wh_ratio_clip (float, optional): The allowed ratio between\n                width and height.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        assert pred_bboxes.size(0) == bboxes.size(0)\n        decoded_bboxes = legacy_delta2bbox(bboxes, pred_bboxes, self.means,\n                                           self.stds, max_shape, wh_ratio_clip)\n\n        return decoded_bboxes\n\n\n@mmcv.jit(coderize=True)\ndef legacy_bbox2delta(proposals,\n                      gt,\n                      means=(0., 0., 0., 0.),\n                      stds=(1., 1., 1., 1.)):\n    \"\"\"Compute deltas of proposals w.r.t. gt in the MMDet V1.x manner.\n\n    We usually compute the deltas of x, y, w, h of proposals w.r.t ground\n    truth bboxes to get regression target.\n    This is the inverse function of `delta2bbox()`\n\n    Args:\n        proposals (Tensor): Boxes to be transformed, shape (N, ..., 4)\n        gt (Tensor): Gt bboxes to be used as base, shape (N, ..., 4)\n        means (Sequence[float]): Denormalizing means for delta coordinates\n        stds (Sequence[float]): Denormalizing standard deviation for delta\n            coordinates\n\n    Returns:\n        Tensor: deltas with shape (N, 4), where columns represent dx, dy,\n            dw, dh.\n    \"\"\"\n    assert proposals.size() == gt.size()\n\n    proposals = proposals.float()\n    gt = gt.float()\n    px = (proposals[..., 0] + proposals[..., 2]) * 0.5\n    py = (proposals[..., 1] + proposals[..., 3]) * 0.5\n    pw = proposals[..., 2] - proposals[..., 0] + 1.0\n    ph = proposals[..., 3] - proposals[..., 1] + 1.0\n\n    gx = (gt[..., 0] + gt[..., 2]) * 0.5\n    gy = (gt[..., 1] + gt[..., 3]) * 0.5\n    gw = gt[..., 2] - gt[..., 0] + 1.0\n    gh = gt[..., 3] - gt[..., 1] + 1.0\n\n    dx = (gx - px) / pw\n    dy = (gy - py) / ph\n    dw = torch.log(gw / pw)\n    dh = torch.log(gh / ph)\n    deltas = torch.stack([dx, dy, dw, dh], dim=-1)\n\n    means = deltas.new_tensor(means).unsqueeze(0)\n    stds = deltas.new_tensor(stds).unsqueeze(0)\n    deltas = deltas.sub_(means).div_(stds)\n\n    return deltas\n\n\n@mmcv.jit(coderize=True)\ndef legacy_delta2bbox(rois,\n                      deltas,\n                      means=(0., 0., 0., 0.),\n                      stds=(1., 1., 1., 1.),\n                      max_shape=None,\n                      wh_ratio_clip=16 / 1000):\n    \"\"\"Apply deltas to shift/scale base boxes in the MMDet V1.x manner.\n\n    Typically the rois are anchor or proposed bounding boxes and the deltas are\n    network outputs used to shift/scale those boxes.\n    This is the inverse function of `bbox2delta()`\n\n    Args:\n        rois (Tensor): Boxes to be transformed. Has shape (N, 4)\n        deltas (Tensor): Encoded offsets with respect to each roi.\n            Has shape (N, 4 * num_classes). Note N = num_anchors * W * H when\n            rois is a grid of anchors. Offset encoding follows [1]_.\n        means (Sequence[float]): Denormalizing means for delta coordinates\n        stds (Sequence[float]): Denormalizing standard deviation for delta\n            coordinates\n        max_shape (tuple[int, int]): Maximum bounds for boxes. specifies (H, W)\n        wh_ratio_clip (float): Maximum aspect ratio for boxes.\n\n    Returns:\n        Tensor: Boxes with shape (N, 4), where columns represent\n            tl_x, tl_y, br_x, br_y.\n\n    References:\n        .. [1] https://arxiv.org/abs/1311.2524\n\n    Example:\n        >>> rois = torch.Tensor([[ 0.,  0.,  1.,  1.],\n        >>>                      [ 0.,  0.,  1.,  1.],\n        >>>                      [ 0.,  0.,  1.,  1.],\n        >>>                      [ 5.,  5.,  5.,  5.]])\n        >>> deltas = torch.Tensor([[  0.,   0.,   0.,   0.],\n        >>>                        [  1.,   1.,   1.,   1.],\n        >>>                        [  0.,   0.,   2.,  -1.],\n        >>>                        [ 0.7, -1.9, -0.5,  0.3]])\n        >>> legacy_delta2bbox(rois, deltas, max_shape=(32, 32))\n        tensor([[0.0000, 0.0000, 1.5000, 1.5000],\n                [0.0000, 0.0000, 5.2183, 5.2183],\n                [0.0000, 0.1321, 7.8891, 0.8679],\n                [5.3967, 2.4251, 6.0033, 3.7749]])\n    \"\"\"\n    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)\n    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)\n    denorm_deltas = deltas * stds + means\n    dx = denorm_deltas[:, 0::4]\n    dy = denorm_deltas[:, 1::4]\n    dw = denorm_deltas[:, 2::4]\n    dh = denorm_deltas[:, 3::4]\n    max_ratio = np.abs(np.log(wh_ratio_clip))\n    dw = dw.clamp(min=-max_ratio, max=max_ratio)\n    dh = dh.clamp(min=-max_ratio, max=max_ratio)\n    # Compute center of each roi\n    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)\n    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)\n    # Compute width/height of each roi\n    pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw)\n    ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh)\n    # Use exp(network energy) to enlarge/shrink each roi\n    gw = pw * dw.exp()\n    gh = ph * dh.exp()\n    # Use network energy to shift the center of each roi\n    gx = px + pw * dx\n    gy = py + ph * dy\n    # Convert center-xy/width/height to top-left, bottom-right\n\n    # The true legacy box coder should +- 0.5 here.\n    # However, current implementation improves the performance when testing\n    # the models trained in MMDetection 1.X (~0.5 bbox AP, 0.2 mask AP)\n    x1 = gx - gw * 0.5\n    y1 = gy - gh * 0.5\n    x2 = gx + gw * 0.5\n    y2 = gy + gh * 0.5\n    if max_shape is not None:\n        x1 = x1.clamp(min=0, max=max_shape[1] - 1)\n        y1 = y1.clamp(min=0, max=max_shape[0] - 1)\n        x2 = x2.clamp(min=0, max=max_shape[1] - 1)\n        y2 = y2.clamp(min=0, max=max_shape[0] - 1)\n    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)\n    return bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/pseudo_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import BBOX_CODERS\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass PseudoBBoxCoder(BaseBBoxCoder):\n    \"\"\"Pseudo bounding box coder.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(BaseBBoxCoder, self).__init__(**kwargs)\n\n    def encode(self, bboxes, gt_bboxes):\n        \"\"\"torch.Tensor: return the given ``bboxes``\"\"\"\n        return gt_bboxes\n\n    def decode(self, bboxes, pred_bboxes):\n        \"\"\"torch.Tensor: return the given ``pred_bboxes``\"\"\"\n        return pred_bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/tblr_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom ..builder import BBOX_CODERS\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass TBLRBBoxCoder(BaseBBoxCoder):\n    \"\"\"TBLR BBox coder.\n\n    Following the practice in `FSAF <https://arxiv.org/abs/1903.00621>`_,\n    this coder encodes gt bboxes (x1, y1, x2, y2) into (top, bottom, left,\n    right) and decode it back to the original.\n\n    Args:\n        normalizer (list | float): Normalization factor to be\n          divided with when coding the coordinates. If it is a list, it should\n          have length of 4 indicating normalization factor in tblr dims.\n          Otherwise it is a unified float factor for all dims. Default: 4.0\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Defaults to True.\n    \"\"\"\n\n    def __init__(self, normalizer=4.0, clip_border=True):\n        super(BaseBBoxCoder, self).__init__()\n        self.normalizer = normalizer\n        self.clip_border = clip_border\n\n    def encode(self, bboxes, gt_bboxes):\n        \"\"\"Get box regression transformation deltas that can be used to\n        transform the ``bboxes`` into the ``gt_bboxes`` in the (top, left,\n        bottom, right) order.\n\n        Args:\n            bboxes (torch.Tensor): source boxes, e.g., object proposals.\n            gt_bboxes (torch.Tensor): target of the transformation, e.g.,\n                ground truth boxes.\n\n        Returns:\n            torch.Tensor: Box transformation deltas\n        \"\"\"\n        assert bboxes.size(0) == gt_bboxes.size(0)\n        assert bboxes.size(-1) == gt_bboxes.size(-1) == 4\n        encoded_bboxes = bboxes2tblr(\n            bboxes, gt_bboxes, normalizer=self.normalizer)\n        return encoded_bboxes\n\n    def decode(self, bboxes, pred_bboxes, max_shape=None):\n        \"\"\"Apply transformation `pred_bboxes` to `boxes`.\n\n        Args:\n            bboxes (torch.Tensor): Basic boxes.Shape (B, N, 4) or (N, 4)\n            pred_bboxes (torch.Tensor): Encoded boxes with shape\n               (B, N, 4) or (N, 4)\n            max_shape (Sequence[int] or torch.Tensor or Sequence[\n               Sequence[int]],optional): Maximum bounds for boxes, specifies\n               (H, W, C) or (H, W). If bboxes shape is (B, N, 4), then\n               the max_shape should be a Sequence[Sequence[int]]\n               and the length of max_shape should also be B.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        decoded_bboxes = tblr2bboxes(\n            bboxes,\n            pred_bboxes,\n            normalizer=self.normalizer,\n            max_shape=max_shape,\n            clip_border=self.clip_border)\n\n        return decoded_bboxes\n\n\n@mmcv.jit(coderize=True)\ndef bboxes2tblr(priors, gts, normalizer=4.0, normalize_by_wh=True):\n    \"\"\"Encode ground truth boxes to tblr coordinate.\n\n    It first convert the gt coordinate to tblr format,\n     (top, bottom, left, right), relative to prior box centers.\n     The tblr coordinate may be normalized by the side length of prior bboxes\n     if `normalize_by_wh` is specified as True, and it is then normalized by\n     the `normalizer` factor.\n\n    Args:\n        priors (Tensor): Prior boxes in point form\n            Shape: (num_proposals,4).\n        gts (Tensor): Coords of ground truth for each prior in point-form\n            Shape: (num_proposals, 4).\n        normalizer (Sequence[float] | float): normalization parameter of\n            encoded boxes. If it is a list, it has to have length = 4.\n            Default: 4.0\n        normalize_by_wh (bool): Whether to normalize tblr coordinate by the\n            side length (wh) of prior bboxes.\n\n    Return:\n        encoded boxes (Tensor), Shape: (num_proposals, 4)\n    \"\"\"\n\n    # dist b/t match center and prior's center\n    if not isinstance(normalizer, float):\n        normalizer = torch.tensor(normalizer, device=priors.device)\n        assert len(normalizer) == 4, 'Normalizer must have length = 4'\n    assert priors.size(0) == gts.size(0)\n    prior_centers = (priors[:, 0:2] + priors[:, 2:4]) / 2\n    xmin, ymin, xmax, ymax = gts.split(1, dim=1)\n    top = prior_centers[:, 1].unsqueeze(1) - ymin\n    bottom = ymax - prior_centers[:, 1].unsqueeze(1)\n    left = prior_centers[:, 0].unsqueeze(1) - xmin\n    right = xmax - prior_centers[:, 0].unsqueeze(1)\n    loc = torch.cat((top, bottom, left, right), dim=1)\n    if normalize_by_wh:\n        # Normalize tblr by anchor width and height\n        wh = priors[:, 2:4] - priors[:, 0:2]\n        w, h = torch.split(wh, 1, dim=1)\n        loc[:, :2] /= h  # tb is normalized by h\n        loc[:, 2:] /= w  # lr is normalized by w\n    # Normalize tblr by the given normalization factor\n    return loc / normalizer\n\n\n@mmcv.jit(coderize=True)\ndef tblr2bboxes(priors,\n                tblr,\n                normalizer=4.0,\n                normalize_by_wh=True,\n                max_shape=None,\n                clip_border=True):\n    \"\"\"Decode tblr outputs to prediction boxes.\n\n    The process includes 3 steps: 1) De-normalize tblr coordinates by\n    multiplying it with `normalizer`; 2) De-normalize tblr coordinates by the\n    prior bbox width and height if `normalize_by_wh` is `True`; 3) Convert\n    tblr (top, bottom, left, right) pair relative to the center of priors back\n    to (xmin, ymin, xmax, ymax) coordinate.\n\n    Args:\n        priors (Tensor): Prior boxes in point form (x0, y0, x1, y1)\n          Shape: (N,4) or (B, N, 4).\n        tblr (Tensor): Coords of network output in tblr form\n          Shape: (N, 4) or (B, N, 4).\n        normalizer (Sequence[float] | float): Normalization parameter of\n          encoded boxes. By list, it represents the normalization factors at\n          tblr dims. By float, it is the unified normalization factor at all\n          dims. Default: 4.0\n        normalize_by_wh (bool): Whether the tblr coordinates have been\n          normalized by the side length (wh) of prior bboxes.\n        max_shape (Sequence[int] or torch.Tensor or Sequence[\n            Sequence[int]],optional): Maximum bounds for boxes, specifies\n            (H, W, C) or (H, W). If priors shape is (B, N, 4), then\n            the max_shape should be a Sequence[Sequence[int]]\n            and the length of max_shape should also be B.\n        clip_border (bool, optional): Whether clip the objects outside the\n            border of the image. Defaults to True.\n\n    Return:\n        encoded boxes (Tensor): Boxes with shape (N, 4) or (B, N, 4)\n    \"\"\"\n    if not isinstance(normalizer, float):\n        normalizer = torch.tensor(normalizer, device=priors.device)\n        assert len(normalizer) == 4, 'Normalizer must have length = 4'\n    assert priors.size(0) == tblr.size(0)\n    if priors.ndim == 3:\n        assert priors.size(1) == tblr.size(1)\n\n    loc_decode = tblr * normalizer\n    prior_centers = (priors[..., 0:2] + priors[..., 2:4]) / 2\n    if normalize_by_wh:\n        wh = priors[..., 2:4] - priors[..., 0:2]\n        w, h = torch.split(wh, 1, dim=-1)\n        # Inplace operation with slice would failed for exporting to ONNX\n        th = h * loc_decode[..., :2]  # tb\n        tw = w * loc_decode[..., 2:]  # lr\n        loc_decode = torch.cat([th, tw], dim=-1)\n    # Cannot be exported using onnx when loc_decode.split(1, dim=-1)\n    top, bottom, left, right = loc_decode.split((1, 1, 1, 1), dim=-1)\n    xmin = prior_centers[..., 0].unsqueeze(-1) - left\n    xmax = prior_centers[..., 0].unsqueeze(-1) + right\n    ymin = prior_centers[..., 1].unsqueeze(-1) - top\n    ymax = prior_centers[..., 1].unsqueeze(-1) + bottom\n\n    bboxes = torch.cat((xmin, ymin, xmax, ymax), dim=-1)\n\n    if clip_border and max_shape is not None:\n        # clip bboxes with dynamic `min` and `max` for onnx\n        if torch.onnx.is_in_onnx_export():\n            from mmdet.core.export import dynamic_clip_for_onnx\n            xmin, ymin, xmax, ymax = dynamic_clip_for_onnx(\n                xmin, ymin, xmax, ymax, max_shape)\n            bboxes = torch.cat([xmin, ymin, xmax, ymax], dim=-1)\n            return bboxes\n        if not isinstance(max_shape, torch.Tensor):\n            max_shape = priors.new_tensor(max_shape)\n        max_shape = max_shape[..., :2].type_as(priors)\n        if max_shape.ndim == 2:\n            assert bboxes.ndim == 3\n            assert max_shape.size(0) == bboxes.size(0)\n\n        min_xy = priors.new_tensor(0)\n        max_xy = torch.cat([max_shape, max_shape],\n                           dim=-1).flip(-1).unsqueeze(-2)\n        bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)\n        bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)\n\n    return bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/coder/yolo_bbox_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom ..builder import BBOX_CODERS\nfrom .base_bbox_coder import BaseBBoxCoder\n\n\n@BBOX_CODERS.register_module()\nclass YOLOBBoxCoder(BaseBBoxCoder):\n    \"\"\"YOLO BBox coder.\n\n    Following `YOLO <https://arxiv.org/abs/1506.02640>`_, this coder divide\n    image into grids, and encode bbox (x1, y1, x2, y2) into (cx, cy, dw, dh).\n    cx, cy in [0., 1.], denotes relative center position w.r.t the center of\n    bboxes. dw, dh are the same as :obj:`DeltaXYWHBBoxCoder`.\n\n    Args:\n        eps (float): Min value of cx, cy when encoding.\n    \"\"\"\n\n    def __init__(self, eps=1e-6):\n        super(BaseBBoxCoder, self).__init__()\n        self.eps = eps\n\n    @mmcv.jit(coderize=True)\n    def encode(self, bboxes, gt_bboxes, stride):\n        \"\"\"Get box regression transformation deltas that can be used to\n        transform the ``bboxes`` into the ``gt_bboxes``.\n\n        Args:\n            bboxes (torch.Tensor): Source boxes, e.g., anchors.\n            gt_bboxes (torch.Tensor): Target of the transformation, e.g.,\n                ground-truth boxes.\n            stride (torch.Tensor | int): Stride of bboxes.\n\n        Returns:\n            torch.Tensor: Box transformation deltas\n        \"\"\"\n\n        assert bboxes.size(0) == gt_bboxes.size(0)\n        assert bboxes.size(-1) == gt_bboxes.size(-1) == 4\n        x_center_gt = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) * 0.5\n        y_center_gt = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) * 0.5\n        w_gt = gt_bboxes[..., 2] - gt_bboxes[..., 0]\n        h_gt = gt_bboxes[..., 3] - gt_bboxes[..., 1]\n        x_center = (bboxes[..., 0] + bboxes[..., 2]) * 0.5\n        y_center = (bboxes[..., 1] + bboxes[..., 3]) * 0.5\n        w = bboxes[..., 2] - bboxes[..., 0]\n        h = bboxes[..., 3] - bboxes[..., 1]\n        w_target = torch.log((w_gt / w).clamp(min=self.eps))\n        h_target = torch.log((h_gt / h).clamp(min=self.eps))\n        x_center_target = ((x_center_gt - x_center) / stride + 0.5).clamp(\n            self.eps, 1 - self.eps)\n        y_center_target = ((y_center_gt - y_center) / stride + 0.5).clamp(\n            self.eps, 1 - self.eps)\n        encoded_bboxes = torch.stack(\n            [x_center_target, y_center_target, w_target, h_target], dim=-1)\n        return encoded_bboxes\n\n    @mmcv.jit(coderize=True)\n    def decode(self, bboxes, pred_bboxes, stride):\n        \"\"\"Apply transformation `pred_bboxes` to `boxes`.\n\n        Args:\n            boxes (torch.Tensor): Basic boxes, e.g. anchors.\n            pred_bboxes (torch.Tensor): Encoded boxes with shape\n            stride (torch.Tensor | int): Strides of bboxes.\n\n        Returns:\n            torch.Tensor: Decoded boxes.\n        \"\"\"\n        assert pred_bboxes.size(-1) == bboxes.size(-1) == 4\n        xy_centers = (bboxes[..., :2] + bboxes[..., 2:]) * 0.5 + (\n            pred_bboxes[..., :2] - 0.5) * stride\n        whs = (bboxes[..., 2:] -\n               bboxes[..., :2]) * 0.5 * pred_bboxes[..., 2:].exp()\n        decoded_bboxes = torch.stack(\n            (xy_centers[..., 0] - whs[..., 0], xy_centers[..., 1] -\n             whs[..., 1], xy_centers[..., 0] + whs[..., 0],\n             xy_centers[..., 1] + whs[..., 1]),\n            dim=-1)\n        return decoded_bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/demodata.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.utils.util_random import ensure_rng\n\n\ndef random_boxes(num=1, scale=1, rng=None):\n    \"\"\"Simple version of ``kwimage.Boxes.random``\n\n    Returns:\n        Tensor: shape (n, 4) in x1, y1, x2, y2 format.\n\n    References:\n        https://gitlab.kitware.com/computer-vision/kwimage/blob/master/kwimage/structs/boxes.py#L1390\n\n    Example:\n        >>> num = 3\n        >>> scale = 512\n        >>> rng = 0\n        >>> boxes = random_boxes(num, scale, rng)\n        >>> print(boxes)\n        tensor([[280.9925, 278.9802, 308.6148, 366.1769],\n                [216.9113, 330.6978, 224.0446, 456.5878],\n                [405.3632, 196.3221, 493.3953, 270.7942]])\n    \"\"\"\n    rng = ensure_rng(rng)\n\n    tlbr = rng.rand(num, 4).astype(np.float32)\n\n    tl_x = np.minimum(tlbr[:, 0], tlbr[:, 2])\n    tl_y = np.minimum(tlbr[:, 1], tlbr[:, 3])\n    br_x = np.maximum(tlbr[:, 0], tlbr[:, 2])\n    br_y = np.maximum(tlbr[:, 1], tlbr[:, 3])\n\n    tlbr[:, 0] = tl_x * scale\n    tlbr[:, 1] = tl_y * scale\n    tlbr[:, 2] = br_x * scale\n    tlbr[:, 3] = br_y * scale\n\n    boxes = torch.from_numpy(tlbr)\n    return boxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/iou_calculators/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .builder import build_iou_calculator\nfrom .iou2d_calculator import BboxOverlaps2D, bbox_overlaps\n\n__all__ = ['build_iou_calculator', 'BboxOverlaps2D', 'bbox_overlaps']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/iou_calculators/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import Registry, build_from_cfg\n\nIOU_CALCULATORS = Registry('IoU calculator')\n\n\ndef build_iou_calculator(cfg, default_args=None):\n    \"\"\"Builder of IoU calculator.\"\"\"\n    return build_from_cfg(cfg, IOU_CALCULATORS, default_args)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/iou_calculators/iou2d_calculator.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom .builder import IOU_CALCULATORS\n\n\ndef cast_tensor_type(x, scale=1., dtype=None):\n    if dtype == 'fp16':\n        # scale is for preventing overflows\n        x = (x / scale).half()\n    return x\n\n\ndef fp16_clamp(x, min=None, max=None):\n    if not x.is_cuda and x.dtype == torch.float16:\n        # clamp for cpu float16, tensor fp16 has no clamp implementation\n        return x.float().clamp(min, max).half()\n\n    return x.clamp(min, max)\n\n\n@IOU_CALCULATORS.register_module()\nclass BboxOverlaps2D:\n    \"\"\"2D Overlaps (e.g. IoUs, GIoUs) Calculator.\"\"\"\n\n    def __init__(self, scale=1., dtype=None):\n        self.scale = scale\n        self.dtype = dtype\n\n    def __call__(self, bboxes1, bboxes2, mode='iou', is_aligned=False):\n        \"\"\"Calculate IoU between 2D bboxes.\n\n        Args:\n            bboxes1 (Tensor): bboxes have shape (m, 4) in <x1, y1, x2, y2>\n                format, or shape (m, 5) in <x1, y1, x2, y2, score> format.\n            bboxes2 (Tensor): bboxes have shape (n, 4) in <x1, y1, x2, y2>\n                format, shape (n, 5) in <x1, y1, x2, y2, score> format, or be\n                empty.\n            mode (str): \"iou\" (intersection over union), \"iof\" (intersection\n                over foreground), or \"giou\" (generalized intersection over\n                union).\n            is_aligned (bool, optional): If True, then m and n must be equal.\n                Default False.\n\n        Returns:\n            Tensor: shape (m, n) if ``is_aligned `` is False else shape (m,)\n        \"\"\"\n        assert bboxes1.size(-1) in [0, 4, 5]\n        assert bboxes2.size(-1) in [0, 4, 5]\n        if bboxes2.size(-1) == 5:\n            bboxes2 = bboxes2[..., :4]\n        if bboxes1.size(-1) == 5:\n            bboxes1 = bboxes1[..., :4]\n\n        if self.dtype == 'fp16':\n            # change tensor type to save cpu and cuda memory and keep speed\n            bboxes1 = cast_tensor_type(bboxes1, self.scale, self.dtype)\n            bboxes2 = cast_tensor_type(bboxes2, self.scale, self.dtype)\n            overlaps = bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)\n            if not overlaps.is_cuda and overlaps.dtype == torch.float16:\n                # resume cpu float32\n                overlaps = overlaps.float()\n            return overlaps\n\n        return bbox_overlaps(bboxes1, bboxes2, mode, is_aligned)\n\n    def __repr__(self):\n        \"\"\"str: a string describing the module\"\"\"\n        repr_str = self.__class__.__name__ + f'(' \\\n            f'scale={self.scale}, dtype={self.dtype})'\n        return repr_str\n\n\ndef bbox_overlaps(bboxes1, bboxes2, mode='iou', is_aligned=False, eps=1e-6):\n    \"\"\"Calculate overlap between two set of bboxes.\n\n    FP16 Contributed by https://github.com/open-mmlab/mmdetection/pull/4889\n    Note:\n        Assume bboxes1 is M x 4, bboxes2 is N x 4, when mode is 'iou',\n        there are some new generated variable when calculating IOU\n        using bbox_overlaps function:\n\n        1) is_aligned is False\n            area1: M x 1\n            area2: N x 1\n            lt: M x N x 2\n            rb: M x N x 2\n            wh: M x N x 2\n            overlap: M x N x 1\n            union: M x N x 1\n            ious: M x N x 1\n\n            Total memory:\n                S = (9 x N x M + N + M) * 4 Byte,\n\n            When using FP16, we can reduce:\n                R = (9 x N x M + N + M) * 4 / 2 Byte\n                R large than (N + M) * 4 * 2 is always true when N and M >= 1.\n                Obviously, N + M <= N * M < 3 * N * M, when N >=2 and M >=2,\n                           N + 1 < 3 * N, when N or M is 1.\n\n            Given M = 40 (ground truth), N = 400000 (three anchor boxes\n            in per grid, FPN, R-CNNs),\n                R = 275 MB (one times)\n\n            A special case (dense detection), M = 512 (ground truth),\n                R = 3516 MB = 3.43 GB\n\n            When the batch size is B, reduce:\n                B x R\n\n            Therefore, CUDA memory runs out frequently.\n\n            Experiments on GeForce RTX 2080Ti (11019 MiB):\n\n            |   dtype   |   M   |   N   |   Use    |   Real   |   Ideal   |\n            |:----:|:----:|:----:|:----:|:----:|:----:|\n            |   FP32   |   512 | 400000 | 8020 MiB |   --   |   --   |\n            |   FP16   |   512 | 400000 |   4504 MiB | 3516 MiB | 3516 MiB |\n            |   FP32   |   40 | 400000 |   1540 MiB |   --   |   --   |\n            |   FP16   |   40 | 400000 |   1264 MiB |   276MiB   | 275 MiB |\n\n        2) is_aligned is True\n            area1: N x 1\n            area2: N x 1\n            lt: N x 2\n            rb: N x 2\n            wh: N x 2\n            overlap: N x 1\n            union: N x 1\n            ious: N x 1\n\n            Total memory:\n                S = 11 x N * 4 Byte\n\n            When using FP16, we can reduce:\n                R = 11 x N * 4 / 2 Byte\n\n        So do the 'giou' (large than 'iou').\n\n        Time-wise, FP16 is generally faster than FP32.\n\n        When gpu_assign_thr is not -1, it takes more time on cpu\n        but not reduce memory.\n        There, we can reduce half the memory and keep the speed.\n\n    If ``is_aligned`` is ``False``, then calculate the overlaps between each\n    bbox of bboxes1 and bboxes2, otherwise the overlaps between each aligned\n    pair of bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (Tensor): shape (B, m, 4) in <x1, y1, x2, y2> format or empty.\n        bboxes2 (Tensor): shape (B, n, 4) in <x1, y1, x2, y2> format or empty.\n            B indicates the batch dim, in shape (B1, B2, ..., Bn).\n            If ``is_aligned`` is ``True``, then m and n must be equal.\n        mode (str): \"iou\" (intersection over union), \"iof\" (intersection over\n            foreground) or \"giou\" (generalized intersection over union).\n            Default \"iou\".\n        is_aligned (bool, optional): If True, then m and n must be equal.\n            Default False.\n        eps (float, optional): A value added to the denominator for numerical\n            stability. Default 1e-6.\n\n    Returns:\n        Tensor: shape (m, n) if ``is_aligned`` is False else shape (m,)\n\n    Example:\n        >>> bboxes1 = torch.FloatTensor([\n        >>>     [0, 0, 10, 10],\n        >>>     [10, 10, 20, 20],\n        >>>     [32, 32, 38, 42],\n        >>> ])\n        >>> bboxes2 = torch.FloatTensor([\n        >>>     [0, 0, 10, 20],\n        >>>     [0, 10, 10, 19],\n        >>>     [10, 10, 20, 20],\n        >>> ])\n        >>> overlaps = bbox_overlaps(bboxes1, bboxes2)\n        >>> assert overlaps.shape == (3, 3)\n        >>> overlaps = bbox_overlaps(bboxes1, bboxes2, is_aligned=True)\n        >>> assert overlaps.shape == (3, )\n\n    Example:\n        >>> empty = torch.empty(0, 4)\n        >>> nonempty = torch.FloatTensor([[0, 0, 10, 9]])\n        >>> assert tuple(bbox_overlaps(empty, nonempty).shape) == (0, 1)\n        >>> assert tuple(bbox_overlaps(nonempty, empty).shape) == (1, 0)\n        >>> assert tuple(bbox_overlaps(empty, empty).shape) == (0, 0)\n    \"\"\"\n\n    assert mode in ['iou', 'iof', 'giou'], f'Unsupported mode {mode}'\n    # Either the boxes are empty or the length of boxes' last dimension is 4\n    assert (bboxes1.size(-1) == 4 or bboxes1.size(0) == 0)\n    assert (bboxes2.size(-1) == 4 or bboxes2.size(0) == 0)\n\n    # Batch dim must be the same\n    # Batch dim: (B1, B2, ... Bn)\n    assert bboxes1.shape[:-2] == bboxes2.shape[:-2]\n    batch_shape = bboxes1.shape[:-2]\n\n    rows = bboxes1.size(-2)\n    cols = bboxes2.size(-2)\n    if is_aligned:\n        assert rows == cols\n\n    if rows * cols == 0:\n        if is_aligned:\n            return bboxes1.new(batch_shape + (rows, ))\n        else:\n            return bboxes1.new(batch_shape + (rows, cols))\n\n    area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (\n        bboxes1[..., 3] - bboxes1[..., 1])\n    area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (\n        bboxes2[..., 3] - bboxes2[..., 1])\n\n    if is_aligned:\n        lt = torch.max(bboxes1[..., :2], bboxes2[..., :2])  # [B, rows, 2]\n        rb = torch.min(bboxes1[..., 2:], bboxes2[..., 2:])  # [B, rows, 2]\n\n        wh = fp16_clamp(rb - lt, min=0)\n        overlap = wh[..., 0] * wh[..., 1]\n\n        if mode in ['iou', 'giou']:\n            union = area1 + area2 - overlap\n        else:\n            union = area1\n        if mode == 'giou':\n            enclosed_lt = torch.min(bboxes1[..., :2], bboxes2[..., :2])\n            enclosed_rb = torch.max(bboxes1[..., 2:], bboxes2[..., 2:])\n    else:\n        lt = torch.max(bboxes1[..., :, None, :2],\n                       bboxes2[..., None, :, :2])  # [B, rows, cols, 2]\n        rb = torch.min(bboxes1[..., :, None, 2:],\n                       bboxes2[..., None, :, 2:])  # [B, rows, cols, 2]\n\n        wh = fp16_clamp(rb - lt, min=0)\n        overlap = wh[..., 0] * wh[..., 1]\n\n        if mode in ['iou', 'giou']:\n            union = area1[..., None] + area2[..., None, :] - overlap\n        else:\n            union = area1[..., None]\n        if mode == 'giou':\n            enclosed_lt = torch.min(bboxes1[..., :, None, :2],\n                                    bboxes2[..., None, :, :2])\n            enclosed_rb = torch.max(bboxes1[..., :, None, 2:],\n                                    bboxes2[..., None, :, 2:])\n\n    eps = union.new_tensor([eps])\n    union = torch.max(union, eps)\n    ious = overlap / union\n    if mode in ['iou', 'iof']:\n        return ious\n    # calculate gious\n    enclose_wh = fp16_clamp(enclosed_rb - enclosed_lt, min=0)\n    enclose_area = enclose_wh[..., 0] * enclose_wh[..., 1]\n    enclose_area = torch.max(enclose_area, eps)\n    gious = ious - (enclose_area - union) / enclose_area\n    return gious\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/match_costs/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .builder import build_match_cost\nfrom .match_cost import (BBoxL1Cost, ClassificationCost, CrossEntropyLossCost,\n                         DiceCost, FocalLossCost, IoUCost)\n\n__all__ = [\n    'build_match_cost', 'ClassificationCost', 'BBoxL1Cost', 'IoUCost',\n    'FocalLossCost', 'DiceCost', 'CrossEntropyLossCost'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/match_costs/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import Registry, build_from_cfg\n\nMATCH_COST = Registry('Match Cost')\n\n\ndef build_match_cost(cfg, default_args=None):\n    \"\"\"Builder of IoU calculator.\"\"\"\n    return build_from_cfg(cfg, MATCH_COST, default_args)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/match_costs/match_cost.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn.functional as F\n\nfrom mmdet.core.bbox.iou_calculators import bbox_overlaps\nfrom mmdet.core.bbox.transforms import bbox_cxcywh_to_xyxy, bbox_xyxy_to_cxcywh\nfrom .builder import MATCH_COST\n\n\n@MATCH_COST.register_module()\nclass BBoxL1Cost:\n    \"\"\"BBoxL1Cost.\n\n     Args:\n         weight (int | float, optional): loss_weight\n         box_format (str, optional): 'xyxy' for DETR, 'xywh' for Sparse_RCNN\n\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import BBoxL1Cost\n         >>> import torch\n         >>> self = BBoxL1Cost()\n         >>> bbox_pred = torch.rand(1, 4)\n         >>> gt_bboxes= torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])\n         >>> factor = torch.tensor([10, 8, 10, 8])\n         >>> self(bbox_pred, gt_bboxes, factor)\n         tensor([[1.6172, 1.6422]])\n    \"\"\"\n\n    def __init__(self, weight=1., box_format='xyxy'):\n        self.weight = weight\n        assert box_format in ['xyxy', 'xywh']\n        self.box_format = box_format\n\n    def __call__(self, bbox_pred, gt_bboxes):\n        \"\"\"\n        Args:\n            bbox_pred (Tensor): Predicted boxes with normalized coordinates\n                (cx, cy, w, h), which are all in range [0, 1]. Shape\n                (num_query, 4).\n            gt_bboxes (Tensor): Ground truth boxes with normalized\n                coordinates (x1, y1, x2, y2). Shape (num_gt, 4).\n\n        Returns:\n            torch.Tensor: bbox_cost value with weight\n        \"\"\"\n        if self.box_format == 'xywh':\n            gt_bboxes = bbox_xyxy_to_cxcywh(gt_bboxes)\n        elif self.box_format == 'xyxy':\n            bbox_pred = bbox_cxcywh_to_xyxy(bbox_pred)\n        bbox_cost = torch.cdist(bbox_pred, gt_bboxes, p=1)\n        return bbox_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass FocalLossCost:\n    \"\"\"FocalLossCost.\n\n     Args:\n         weight (int | float, optional): loss_weight\n         alpha (int | float, optional): focal_loss alpha\n         gamma (int | float, optional): focal_loss gamma\n         eps (float, optional): default 1e-12\n         binary_input (bool, optional): Whether the input is binary,\n            default False.\n\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import FocalLossCost\n         >>> import torch\n         >>> self = FocalLossCost()\n         >>> cls_pred = torch.rand(4, 3)\n         >>> gt_labels = torch.tensor([0, 1, 2])\n         >>> factor = torch.tensor([10, 8, 10, 8])\n         >>> self(cls_pred, gt_labels)\n         tensor([[-0.3236, -0.3364, -0.2699],\n                [-0.3439, -0.3209, -0.4807],\n                [-0.4099, -0.3795, -0.2929],\n                [-0.1950, -0.1207, -0.2626]])\n    \"\"\"\n\n    def __init__(self,\n                 weight=1.,\n                 alpha=0.25,\n                 gamma=2,\n                 eps=1e-12,\n                 binary_input=False):\n        self.weight = weight\n        self.alpha = alpha\n        self.gamma = gamma\n        self.eps = eps\n        self.binary_input = binary_input\n\n    def _focal_loss_cost(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classification logits, shape\n                (num_query, num_class).\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n\n        Returns:\n            torch.Tensor: cls_cost value with weight\n        \"\"\"\n        cls_pred = cls_pred.sigmoid()\n        neg_cost = -(1 - cls_pred + self.eps).log() * (\n            1 - self.alpha) * cls_pred.pow(self.gamma)\n        pos_cost = -(cls_pred + self.eps).log() * self.alpha * (\n            1 - cls_pred).pow(self.gamma)\n\n        cls_cost = pos_cost[:, gt_labels] - neg_cost[:, gt_labels]\n        return cls_cost * self.weight\n\n    def _mask_focal_loss_cost(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classfication logits\n                in shape (num_query, d1, ..., dn), dtype=torch.float32.\n            gt_labels (Tensor): Ground truth in shape (num_gt, d1, ..., dn),\n                dtype=torch.long. Labels should be binary.\n\n        Returns:\n            Tensor: Focal cost matrix with weight in shape\\\n                (num_query, num_gt).\n        \"\"\"\n        cls_pred = cls_pred.flatten(1)\n        gt_labels = gt_labels.flatten(1).float()\n        n = cls_pred.shape[1]\n        cls_pred = cls_pred.sigmoid()\n        neg_cost = -(1 - cls_pred + self.eps).log() * (\n            1 - self.alpha) * cls_pred.pow(self.gamma)\n        pos_cost = -(cls_pred + self.eps).log() * self.alpha * (\n            1 - cls_pred).pow(self.gamma)\n\n        cls_cost = torch.einsum('nc,mc->nm', pos_cost, gt_labels) + \\\n            torch.einsum('nc,mc->nm', neg_cost, (1 - gt_labels))\n        return cls_cost / n * self.weight\n\n    def __call__(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classfication logits.\n            gt_labels (Tensor)): Labels.\n\n        Returns:\n            Tensor: Focal cost matrix with weight in shape\\\n                (num_query, num_gt).\n        \"\"\"\n        if self.binary_input:\n            return self._mask_focal_loss_cost(cls_pred, gt_labels)\n        else:\n            return self._focal_loss_cost(cls_pred, gt_labels)\n\n\n@MATCH_COST.register_module()\nclass ClassificationCost:\n    \"\"\"ClsSoftmaxCost.\n\n     Args:\n         weight (int | float, optional): loss_weight\n\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import \\\n         ... ClassificationCost\n         >>> import torch\n         >>> self = ClassificationCost()\n         >>> cls_pred = torch.rand(4, 3)\n         >>> gt_labels = torch.tensor([0, 1, 2])\n         >>> factor = torch.tensor([10, 8, 10, 8])\n         >>> self(cls_pred, gt_labels)\n         tensor([[-0.3430, -0.3525, -0.3045],\n                [-0.3077, -0.2931, -0.3992],\n                [-0.3664, -0.3455, -0.2881],\n                [-0.3343, -0.2701, -0.3956]])\n    \"\"\"\n\n    def __init__(self, weight=1.):\n        self.weight = weight\n\n    def __call__(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classification logits, shape\n                (num_query, num_class).\n            gt_labels (Tensor): Label of `gt_bboxes`, shape (num_gt,).\n\n        Returns:\n            torch.Tensor: cls_cost value with weight\n        \"\"\"\n        # Following the official DETR repo, contrary to the loss that\n        # NLL is used, we approximate it in 1 - cls_score[gt_label].\n        # The 1 is a constant that doesn't change the matching,\n        # so it can be omitted.\n        cls_score = cls_pred.softmax(-1)\n        cls_cost = -cls_score[:, gt_labels]\n        return cls_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass IoUCost:\n    \"\"\"IoUCost.\n\n     Args:\n         iou_mode (str, optional): iou mode such as 'iou' | 'giou'\n         weight (int | float, optional): loss weight\n\n     Examples:\n         >>> from mmdet.core.bbox.match_costs.match_cost import IoUCost\n         >>> import torch\n         >>> self = IoUCost()\n         >>> bboxes = torch.FloatTensor([[1,1, 2, 2], [2, 2, 3, 4]])\n         >>> gt_bboxes = torch.FloatTensor([[0, 0, 2, 4], [1, 2, 3, 4]])\n         >>> self(bboxes, gt_bboxes)\n         tensor([[-0.1250,  0.1667],\n                [ 0.1667, -0.5000]])\n    \"\"\"\n\n    def __init__(self, iou_mode='giou', weight=1.):\n        self.weight = weight\n        self.iou_mode = iou_mode\n\n    def __call__(self, bboxes, gt_bboxes):\n        \"\"\"\n        Args:\n            bboxes (Tensor): Predicted boxes with unnormalized coordinates\n                (x1, y1, x2, y2). Shape (num_query, 4).\n            gt_bboxes (Tensor): Ground truth boxes with unnormalized\n                coordinates (x1, y1, x2, y2). Shape (num_gt, 4).\n\n        Returns:\n            torch.Tensor: iou_cost value with weight\n        \"\"\"\n        # overlaps: [num_bboxes, num_gt]\n        overlaps = bbox_overlaps(\n            bboxes, gt_bboxes, mode=self.iou_mode, is_aligned=False)\n        # The 1 is a constant that doesn't change the matching, so omitted.\n        iou_cost = -overlaps\n        return iou_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass DiceCost:\n    \"\"\"Cost of mask assignments based on dice losses.\n\n    Args:\n        weight (int | float, optional): loss_weight. Defaults to 1.\n        pred_act (bool, optional): Whether to apply sigmoid to mask_pred.\n            Defaults to False.\n        eps (float, optional): default 1e-12.\n        naive_dice (bool, optional): If True, use the naive dice loss\n            in which the power of the number in the denominator is\n            the first power. If Flase, use the second power that\n            is adopted by K-Net and SOLO.\n            Defaults to True.\n    \"\"\"\n\n    def __init__(self, weight=1., pred_act=False, eps=1e-3, naive_dice=True):\n        self.weight = weight\n        self.pred_act = pred_act\n        self.eps = eps\n        self.naive_dice = naive_dice\n\n    def binary_mask_dice_loss(self, mask_preds, gt_masks):\n        \"\"\"\n        Args:\n            mask_preds (Tensor): Mask prediction in shape (num_query, *).\n            gt_masks (Tensor): Ground truth in shape (num_gt, *)\n                store 0 or 1, 0 for negative class and 1 for\n                positive class.\n\n        Returns:\n            Tensor: Dice cost matrix in shape (num_query, num_gt).\n        \"\"\"\n        mask_preds = mask_preds.flatten(1)\n        gt_masks = gt_masks.flatten(1).float()\n        numerator = 2 * torch.einsum('nc,mc->nm', mask_preds, gt_masks)\n        if self.naive_dice:\n            denominator = mask_preds.sum(-1)[:, None] + \\\n                gt_masks.sum(-1)[None, :]\n        else:\n            denominator = mask_preds.pow(2).sum(1)[:, None] + \\\n                gt_masks.pow(2).sum(1)[None, :]\n        loss = 1 - (numerator + self.eps) / (denominator + self.eps)\n        return loss\n\n    def __call__(self, mask_preds, gt_masks):\n        \"\"\"\n        Args:\n            mask_preds (Tensor): Mask prediction logits in shape (num_query, *)\n            gt_masks (Tensor): Ground truth in shape (num_gt, *)\n\n        Returns:\n            Tensor: Dice cost matrix with weight in shape (num_query, num_gt).\n        \"\"\"\n        if self.pred_act:\n            mask_preds = mask_preds.sigmoid()\n        dice_cost = self.binary_mask_dice_loss(mask_preds, gt_masks)\n        return dice_cost * self.weight\n\n\n@MATCH_COST.register_module()\nclass CrossEntropyLossCost:\n    \"\"\"CrossEntropyLossCost.\n\n    Args:\n        weight (int | float, optional): loss weight. Defaults to 1.\n        use_sigmoid (bool, optional): Whether the prediction uses sigmoid\n                of softmax. Defaults to True.\n    Examples:\n         >>> from mmdet.core.bbox.match_costs import CrossEntropyLossCost\n         >>> import torch\n         >>> bce = CrossEntropyLossCost(use_sigmoid=True)\n         >>> cls_pred = torch.tensor([[7.6, 1.2], [-1.3, 10]])\n         >>> gt_labels = torch.tensor([[1, 1], [1, 0]])\n         >>> print(bce(cls_pred, gt_labels))\n    \"\"\"\n\n    def __init__(self, weight=1., use_sigmoid=True):\n        assert use_sigmoid, 'use_sigmoid = False is not supported yet.'\n        self.weight = weight\n        self.use_sigmoid = use_sigmoid\n\n    def _binary_cross_entropy(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): The prediction with shape (num_query, 1, *) or\n                (num_query, *).\n            gt_labels (Tensor): The learning label of prediction with\n                shape (num_gt, *).\n\n        Returns:\n            Tensor: Cross entropy cost matrix in shape (num_query, num_gt).\n        \"\"\"\n        cls_pred = cls_pred.flatten(1).float()\n        gt_labels = gt_labels.flatten(1).float()\n        n = cls_pred.shape[1]\n        pos = F.binary_cross_entropy_with_logits(\n            cls_pred, torch.ones_like(cls_pred), reduction='none')\n        neg = F.binary_cross_entropy_with_logits(\n            cls_pred, torch.zeros_like(cls_pred), reduction='none')\n        cls_cost = torch.einsum('nc,mc->nm', pos, gt_labels) + \\\n            torch.einsum('nc,mc->nm', neg, 1 - gt_labels)\n        cls_cost = cls_cost / n\n\n        return cls_cost\n\n    def __call__(self, cls_pred, gt_labels):\n        \"\"\"\n        Args:\n            cls_pred (Tensor): Predicted classification logits.\n            gt_labels (Tensor): Labels.\n\n        Returns:\n            Tensor: Cross entropy cost matrix with weight in\n                shape (num_query, num_gt).\n        \"\"\"\n        if self.use_sigmoid:\n            cls_cost = self._binary_cross_entropy(cls_pred, gt_labels)\n        else:\n            raise NotImplementedError\n\n        return cls_cost * self.weight\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_sampler import BaseSampler\nfrom .combined_sampler import CombinedSampler\nfrom .instance_balanced_pos_sampler import InstanceBalancedPosSampler\nfrom .iou_balanced_neg_sampler import IoUBalancedNegSampler\nfrom .mask_pseudo_sampler import MaskPseudoSampler\nfrom .mask_sampling_result import MaskSamplingResult\nfrom .ohem_sampler import OHEMSampler\nfrom .pseudo_sampler import PseudoSampler\nfrom .random_sampler import RandomSampler\nfrom .sampling_result import SamplingResult\nfrom .score_hlr_sampler import ScoreHLRSampler\n\n__all__ = [\n    'BaseSampler', 'PseudoSampler', 'RandomSampler',\n    'InstanceBalancedPosSampler', 'IoUBalancedNegSampler', 'CombinedSampler',\n    'OHEMSampler', 'SamplingResult', 'ScoreHLRSampler', 'MaskPseudoSampler',\n    'MaskSamplingResult'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/base_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nimport torch\n\nfrom .sampling_result import SamplingResult\n\n\nclass BaseSampler(metaclass=ABCMeta):\n    \"\"\"Base class of samplers.\"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 **kwargs):\n        self.num = num\n        self.pos_fraction = pos_fraction\n        self.neg_pos_ub = neg_pos_ub\n        self.add_gt_as_proposals = add_gt_as_proposals\n        self.pos_sampler = self\n        self.neg_sampler = self\n\n    @abstractmethod\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Sample positive samples.\"\"\"\n        pass\n\n    @abstractmethod\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        \"\"\"Sample negative samples.\"\"\"\n        pass\n\n    def sample(self,\n               assign_result,\n               bboxes,\n               gt_bboxes,\n               gt_labels=None,\n               **kwargs):\n        \"\"\"Sample positive and negative bboxes.\n\n        This is a simple implementation of bbox sampling given candidates,\n        assigning results and ground truth bboxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Bbox assigning results.\n            bboxes (Tensor): Boxes to be sampled from.\n            gt_bboxes (Tensor): Ground truth bboxes.\n            gt_labels (Tensor, optional): Class labels of ground truth bboxes.\n\n        Returns:\n            :obj:`SamplingResult`: Sampling result.\n\n        Example:\n            >>> from mmdet.core.bbox import RandomSampler\n            >>> from mmdet.core.bbox import AssignResult\n            >>> from mmdet.core.bbox.demodata import ensure_rng, random_boxes\n            >>> rng = ensure_rng(None)\n            >>> assign_result = AssignResult.random(rng=rng)\n            >>> bboxes = random_boxes(assign_result.num_preds, rng=rng)\n            >>> gt_bboxes = random_boxes(assign_result.num_gts, rng=rng)\n            >>> gt_labels = None\n            >>> self = RandomSampler(num=32, pos_fraction=0.5, neg_pos_ub=-1,\n            >>>                      add_gt_as_proposals=False)\n            >>> self = self.sample(assign_result, bboxes, gt_bboxes, gt_labels)\n        \"\"\"\n        if len(bboxes.shape) < 2:\n            bboxes = bboxes[None, :]\n\n        bboxes = bboxes[:, :4]\n\n        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)\n        if self.add_gt_as_proposals and len(gt_bboxes) > 0:\n            if gt_labels is None:\n                raise ValueError(\n                    'gt_labels must be given when add_gt_as_proposals is True')\n            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)\n            assign_result.add_gt_(gt_labels)\n            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)\n            gt_flags = torch.cat([gt_ones, gt_flags])\n\n        num_expected_pos = int(self.num * self.pos_fraction)\n        pos_inds = self.pos_sampler._sample_pos(\n            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)\n        # We found that sampled indices have duplicated items occasionally.\n        # (may be a bug of PyTorch)\n        pos_inds = pos_inds.unique()\n        num_sampled_pos = pos_inds.numel()\n        num_expected_neg = self.num - num_sampled_pos\n        if self.neg_pos_ub >= 0:\n            _pos = max(1, num_sampled_pos)\n            neg_upper_bound = int(self.neg_pos_ub * _pos)\n            if num_expected_neg > neg_upper_bound:\n                num_expected_neg = neg_upper_bound\n        neg_inds = self.neg_sampler._sample_neg(\n            assign_result, num_expected_neg, bboxes=bboxes, **kwargs)\n        neg_inds = neg_inds.unique()\n\n        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                                         assign_result, gt_flags)\n        return sampling_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/combined_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import BBOX_SAMPLERS, build_sampler\nfrom .base_sampler import BaseSampler\n\n\n@BBOX_SAMPLERS.register_module()\nclass CombinedSampler(BaseSampler):\n    \"\"\"A sampler that combines positive sampler and negative sampler.\"\"\"\n\n    def __init__(self, pos_sampler, neg_sampler, **kwargs):\n        super(CombinedSampler, self).__init__(**kwargs)\n        self.pos_sampler = build_sampler(pos_sampler, **kwargs)\n        self.neg_sampler = build_sampler(neg_sampler, **kwargs)\n\n    def _sample_pos(self, **kwargs):\n        \"\"\"Sample positive samples.\"\"\"\n        raise NotImplementedError\n\n    def _sample_neg(self, **kwargs):\n        \"\"\"Sample negative samples.\"\"\"\n        raise NotImplementedError\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/instance_balanced_pos_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom ..builder import BBOX_SAMPLERS\nfrom .random_sampler import RandomSampler\n\n\n@BBOX_SAMPLERS.register_module()\nclass InstanceBalancedPosSampler(RandomSampler):\n    \"\"\"Instance balanced sampler that samples equal number of positive samples\n    for each instance.\"\"\"\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Sample positive boxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): The assigned results of boxes.\n            num_expected (int): The number of expected positive samples\n\n        Returns:\n            Tensor or ndarray: sampled indices.\n        \"\"\"\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            unique_gt_inds = assign_result.gt_inds[pos_inds].unique()\n            num_gts = len(unique_gt_inds)\n            num_per_gt = int(round(num_expected / float(num_gts)) + 1)\n            sampled_inds = []\n            for i in unique_gt_inds:\n                inds = torch.nonzero(\n                    assign_result.gt_inds == i.item(), as_tuple=False)\n                if inds.numel() != 0:\n                    inds = inds.squeeze(1)\n                else:\n                    continue\n                if len(inds) > num_per_gt:\n                    inds = self.random_choice(inds, num_per_gt)\n                sampled_inds.append(inds)\n            sampled_inds = torch.cat(sampled_inds)\n            if len(sampled_inds) < num_expected:\n                num_extra = num_expected - len(sampled_inds)\n                extra_inds = np.array(\n                    list(set(pos_inds.cpu()) - set(sampled_inds.cpu())))\n                if len(extra_inds) > num_extra:\n                    extra_inds = self.random_choice(extra_inds, num_extra)\n                extra_inds = torch.from_numpy(extra_inds).to(\n                    assign_result.gt_inds.device).long()\n                sampled_inds = torch.cat([sampled_inds, extra_inds])\n            elif len(sampled_inds) > num_expected:\n                sampled_inds = self.random_choice(sampled_inds, num_expected)\n            return sampled_inds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/iou_balanced_neg_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom ..builder import BBOX_SAMPLERS\nfrom .random_sampler import RandomSampler\n\n\n@BBOX_SAMPLERS.register_module()\nclass IoUBalancedNegSampler(RandomSampler):\n    \"\"\"IoU Balanced Sampling.\n\n    arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)\n\n    Sampling proposals according to their IoU. `floor_fraction` of needed RoIs\n    are sampled from proposals whose IoU are lower than `floor_thr` randomly.\n    The others are sampled from proposals whose IoU are higher than\n    `floor_thr`. These proposals are sampled from some bins evenly, which are\n    split by `num_bins` via IoU evenly.\n\n    Args:\n        num (int): number of proposals.\n        pos_fraction (float): fraction of positive proposals.\n        floor_thr (float): threshold (minimum) IoU for IoU balanced sampling,\n            set to -1 if all using IoU balanced sampling.\n        floor_fraction (float): sampling fraction of proposals under floor_thr.\n        num_bins (int): number of bins in IoU balanced sampling.\n    \"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 floor_thr=-1,\n                 floor_fraction=0,\n                 num_bins=3,\n                 **kwargs):\n        super(IoUBalancedNegSampler, self).__init__(num, pos_fraction,\n                                                    **kwargs)\n        assert floor_thr >= 0 or floor_thr == -1\n        assert 0 <= floor_fraction <= 1\n        assert num_bins >= 1\n\n        self.floor_thr = floor_thr\n        self.floor_fraction = floor_fraction\n        self.num_bins = num_bins\n\n    def sample_via_interval(self, max_overlaps, full_set, num_expected):\n        \"\"\"Sample according to the iou interval.\n\n        Args:\n            max_overlaps (torch.Tensor): IoU between bounding boxes and ground\n                truth boxes.\n            full_set (set(int)): A full set of indices of boxes。\n            num_expected (int): Number of expected samples。\n\n        Returns:\n            np.ndarray: Indices  of samples\n        \"\"\"\n        max_iou = max_overlaps.max()\n        iou_interval = (max_iou - self.floor_thr) / self.num_bins\n        per_num_expected = int(num_expected / self.num_bins)\n\n        sampled_inds = []\n        for i in range(self.num_bins):\n            start_iou = self.floor_thr + i * iou_interval\n            end_iou = self.floor_thr + (i + 1) * iou_interval\n            tmp_set = set(\n                np.where(\n                    np.logical_and(max_overlaps >= start_iou,\n                                   max_overlaps < end_iou))[0])\n            tmp_inds = list(tmp_set & full_set)\n            if len(tmp_inds) > per_num_expected:\n                tmp_sampled_set = self.random_choice(tmp_inds,\n                                                     per_num_expected)\n            else:\n                tmp_sampled_set = np.array(tmp_inds, dtype=np.int)\n            sampled_inds.append(tmp_sampled_set)\n\n        sampled_inds = np.concatenate(sampled_inds)\n        if len(sampled_inds) < num_expected:\n            num_extra = num_expected - len(sampled_inds)\n            extra_inds = np.array(list(full_set - set(sampled_inds)))\n            if len(extra_inds) > num_extra:\n                extra_inds = self.random_choice(extra_inds, num_extra)\n            sampled_inds = np.concatenate([sampled_inds, extra_inds])\n\n        return sampled_inds\n\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        \"\"\"Sample negative boxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): The assigned results of boxes.\n            num_expected (int): The number of expected negative samples\n\n        Returns:\n            Tensor or ndarray: sampled indices.\n        \"\"\"\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            max_overlaps = assign_result.max_overlaps.cpu().numpy()\n            # balance sampling for negative samples\n            neg_set = set(neg_inds.cpu().numpy())\n\n            if self.floor_thr > 0:\n                floor_set = set(\n                    np.where(\n                        np.logical_and(max_overlaps >= 0,\n                                       max_overlaps < self.floor_thr))[0])\n                iou_sampling_set = set(\n                    np.where(max_overlaps >= self.floor_thr)[0])\n            elif self.floor_thr == 0:\n                floor_set = set(np.where(max_overlaps == 0)[0])\n                iou_sampling_set = set(\n                    np.where(max_overlaps > self.floor_thr)[0])\n            else:\n                floor_set = set()\n                iou_sampling_set = set(\n                    np.where(max_overlaps > self.floor_thr)[0])\n                # for sampling interval calculation\n                self.floor_thr = 0\n\n            floor_neg_inds = list(floor_set & neg_set)\n            iou_sampling_neg_inds = list(iou_sampling_set & neg_set)\n            num_expected_iou_sampling = int(num_expected *\n                                            (1 - self.floor_fraction))\n            if len(iou_sampling_neg_inds) > num_expected_iou_sampling:\n                if self.num_bins >= 2:\n                    iou_sampled_inds = self.sample_via_interval(\n                        max_overlaps, set(iou_sampling_neg_inds),\n                        num_expected_iou_sampling)\n                else:\n                    iou_sampled_inds = self.random_choice(\n                        iou_sampling_neg_inds, num_expected_iou_sampling)\n            else:\n                iou_sampled_inds = np.array(\n                    iou_sampling_neg_inds, dtype=np.int)\n            num_expected_floor = num_expected - len(iou_sampled_inds)\n            if len(floor_neg_inds) > num_expected_floor:\n                sampled_floor_inds = self.random_choice(\n                    floor_neg_inds, num_expected_floor)\n            else:\n                sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)\n            sampled_inds = np.concatenate(\n                (sampled_floor_inds, iou_sampled_inds))\n            if len(sampled_inds) < num_expected:\n                num_extra = num_expected - len(sampled_inds)\n                extra_inds = np.array(list(neg_set - set(sampled_inds)))\n                if len(extra_inds) > num_extra:\n                    extra_inds = self.random_choice(extra_inds, num_extra)\n                sampled_inds = np.concatenate((sampled_inds, extra_inds))\n            sampled_inds = torch.from_numpy(sampled_inds).long().to(\n                assign_result.gt_inds.device)\n            return sampled_inds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/mask_pseudo_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"copy from\nhttps://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py.\"\"\"\n\nimport torch\n\nfrom mmdet.core.bbox.builder import BBOX_SAMPLERS\nfrom .base_sampler import BaseSampler\nfrom .mask_sampling_result import MaskSamplingResult\n\n\n@BBOX_SAMPLERS.register_module()\nclass MaskPseudoSampler(BaseSampler):\n    \"\"\"A pseudo sampler that does not do sampling actually.\"\"\"\n\n    def __init__(self, **kwargs):\n        pass\n\n    def _sample_pos(self, **kwargs):\n        \"\"\"Sample positive samples.\"\"\"\n        raise NotImplementedError\n\n    def _sample_neg(self, **kwargs):\n        \"\"\"Sample negative samples.\"\"\"\n        raise NotImplementedError\n\n    def sample(self, assign_result, masks, gt_masks, **kwargs):\n        \"\"\"Directly returns the positive and negative indices  of samples.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Assigned results\n            masks (torch.Tensor): Bounding boxes\n            gt_masks (torch.Tensor): Ground truth boxes\n        Returns:\n            :obj:`SamplingResult`: sampler results\n        \"\"\"\n        pos_inds = torch.nonzero(\n            assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()\n        neg_inds = torch.nonzero(\n            assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()\n        gt_flags = masks.new_zeros(masks.shape[0], dtype=torch.uint8)\n        sampling_result = MaskSamplingResult(pos_inds, neg_inds, masks,\n                                             gt_masks, assign_result, gt_flags)\n        return sampling_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/mask_sampling_result.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"copy from\nhttps://github.com/ZwwWayne/K-Net/blob/main/knet/det/mask_pseudo_sampler.py.\"\"\"\n\nimport torch\n\nfrom .sampling_result import SamplingResult\n\n\nclass MaskSamplingResult(SamplingResult):\n    \"\"\"Mask sampling result.\"\"\"\n\n    def __init__(self, pos_inds, neg_inds, masks, gt_masks, assign_result,\n                 gt_flags):\n        self.pos_inds = pos_inds\n        self.neg_inds = neg_inds\n        self.pos_masks = masks[pos_inds]\n        self.neg_masks = masks[neg_inds]\n        self.pos_is_gt = gt_flags[pos_inds]\n\n        self.num_gts = gt_masks.shape[0]\n        self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1\n\n        if gt_masks.numel() == 0:\n            # hack for index error case\n            assert self.pos_assigned_gt_inds.numel() == 0\n            self.pos_gt_masks = torch.empty_like(gt_masks)\n        else:\n            self.pos_gt_masks = gt_masks[self.pos_assigned_gt_inds, :]\n\n        if assign_result.labels is not None:\n            self.pos_gt_labels = assign_result.labels[pos_inds]\n        else:\n            self.pos_gt_labels = None\n\n    @property\n    def masks(self):\n        \"\"\"torch.Tensor: concatenated positive and negative boxes\"\"\"\n        return torch.cat([self.pos_masks, self.neg_masks])\n\n    def __nice__(self):\n        data = self.info.copy()\n        data['pos_masks'] = data.pop('pos_masks').shape\n        data['neg_masks'] = data.pop('neg_masks').shape\n        parts = [f\"'{k}': {v!r}\" for k, v in sorted(data.items())]\n        body = '    ' + ',\\n    '.join(parts)\n        return '{\\n' + body + '\\n}'\n\n    @property\n    def info(self):\n        \"\"\"Returns a dictionary of info about the object.\"\"\"\n        return {\n            'pos_inds': self.pos_inds,\n            'neg_inds': self.neg_inds,\n            'pos_masks': self.pos_masks,\n            'neg_masks': self.neg_masks,\n            'pos_is_gt': self.pos_is_gt,\n            'num_gts': self.num_gts,\n            'pos_assigned_gt_inds': self.pos_assigned_gt_inds,\n        }\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/ohem_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_SAMPLERS\nfrom ..transforms import bbox2roi\nfrom .base_sampler import BaseSampler\n\n\n@BBOX_SAMPLERS.register_module()\nclass OHEMSampler(BaseSampler):\n    r\"\"\"Online Hard Example Mining Sampler described in `Training Region-based\n    Object Detectors with Online Hard Example Mining\n    <https://arxiv.org/abs/1604.03540>`_.\n    \"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 context,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 loss_key='loss_cls',\n                 **kwargs):\n        super(OHEMSampler, self).__init__(num, pos_fraction, neg_pos_ub,\n                                          add_gt_as_proposals)\n        self.context = context\n        if not hasattr(self.context, 'num_stages'):\n            self.bbox_head = self.context.bbox_head\n        else:\n            self.bbox_head = self.context.bbox_head[self.context.current_stage]\n\n        self.loss_key = loss_key\n\n    def hard_mining(self, inds, num_expected, bboxes, labels, feats):\n        with torch.no_grad():\n            rois = bbox2roi([bboxes])\n            if not hasattr(self.context, 'num_stages'):\n                bbox_results = self.context._bbox_forward(feats, rois)\n            else:\n                bbox_results = self.context._bbox_forward(\n                    self.context.current_stage, feats, rois)\n            cls_score = bbox_results['cls_score']\n            loss = self.bbox_head.loss(\n                cls_score=cls_score,\n                bbox_pred=None,\n                rois=rois,\n                labels=labels,\n                label_weights=cls_score.new_ones(cls_score.size(0)),\n                bbox_targets=None,\n                bbox_weights=None,\n                reduction_override='none')[self.loss_key]\n            _, topk_loss_inds = loss.topk(num_expected)\n        return inds[topk_loss_inds]\n\n    def _sample_pos(self,\n                    assign_result,\n                    num_expected,\n                    bboxes=None,\n                    feats=None,\n                    **kwargs):\n        \"\"\"Sample positive boxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Assigned results\n            num_expected (int): Number of expected positive samples\n            bboxes (torch.Tensor, optional): Boxes. Defaults to None.\n            feats (list[torch.Tensor], optional): Multi-level features.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: Indices  of positive samples\n        \"\"\"\n        # Sample some hard positive samples\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.hard_mining(pos_inds, num_expected, bboxes[pos_inds],\n                                    assign_result.labels[pos_inds], feats)\n\n    def _sample_neg(self,\n                    assign_result,\n                    num_expected,\n                    bboxes=None,\n                    feats=None,\n                    **kwargs):\n        \"\"\"Sample negative boxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Assigned results\n            num_expected (int): Number of expected negative samples\n            bboxes (torch.Tensor, optional): Boxes. Defaults to None.\n            feats (list[torch.Tensor], optional): Multi-level features.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: Indices  of negative samples\n        \"\"\"\n        # Sample some hard negative samples\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            neg_labels = assign_result.labels.new_empty(\n                neg_inds.size(0)).fill_(self.bbox_head.num_classes)\n            return self.hard_mining(neg_inds, num_expected, bboxes[neg_inds],\n                                    neg_labels, feats)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/pseudo_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_SAMPLERS\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\n@BBOX_SAMPLERS.register_module()\nclass PseudoSampler(BaseSampler):\n    \"\"\"A pseudo sampler that does not do sampling actually.\"\"\"\n\n    def __init__(self, **kwargs):\n        pass\n\n    def _sample_pos(self, **kwargs):\n        \"\"\"Sample positive samples.\"\"\"\n        raise NotImplementedError\n\n    def _sample_neg(self, **kwargs):\n        \"\"\"Sample negative samples.\"\"\"\n        raise NotImplementedError\n\n    def sample(self, assign_result, bboxes, gt_bboxes, *args, **kwargs):\n        \"\"\"Directly returns the positive and negative indices  of samples.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Assigned results\n            bboxes (torch.Tensor): Bounding boxes\n            gt_bboxes (torch.Tensor): Ground truth boxes\n\n        Returns:\n            :obj:`SamplingResult`: sampler results\n        \"\"\"\n        pos_inds = torch.nonzero(\n            assign_result.gt_inds > 0, as_tuple=False).squeeze(-1).unique()\n        neg_inds = torch.nonzero(\n            assign_result.gt_inds == 0, as_tuple=False).squeeze(-1).unique()\n        gt_flags = bboxes.new_zeros(bboxes.shape[0], dtype=torch.uint8)\n        sampling_result = SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                                         assign_result, gt_flags)\n        return sampling_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/random_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom ..builder import BBOX_SAMPLERS\nfrom .base_sampler import BaseSampler\n\n\n@BBOX_SAMPLERS.register_module()\nclass RandomSampler(BaseSampler):\n    \"\"\"Random sampler.\n\n    Args:\n        num (int): Number of samples\n        pos_fraction (float): Fraction of positive samples\n        neg_pos_ub (int, optional): Upper bound number of negative and\n            positive samples. Defaults to -1.\n        add_gt_as_proposals (bool, optional): Whether to add ground truth\n            boxes as proposals. Defaults to True.\n    \"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 **kwargs):\n        from mmdet.core.bbox import demodata\n        super(RandomSampler, self).__init__(num, pos_fraction, neg_pos_ub,\n                                            add_gt_as_proposals)\n        self.rng = demodata.ensure_rng(kwargs.get('rng', None))\n\n    def random_choice(self, gallery, num):\n        \"\"\"Random select some elements from the gallery.\n\n        If `gallery` is a Tensor, the returned indices will be a Tensor;\n        If `gallery` is a ndarray or list, the returned indices will be a\n        ndarray.\n\n        Args:\n            gallery (Tensor | ndarray | list): indices pool.\n            num (int): expected sample num.\n\n        Returns:\n            Tensor or ndarray: sampled indices.\n        \"\"\"\n        assert len(gallery) >= num\n\n        is_tensor = isinstance(gallery, torch.Tensor)\n        if not is_tensor:\n            if torch.cuda.is_available():\n                device = torch.cuda.current_device()\n            else:\n                device = 'cpu'\n            gallery = torch.tensor(gallery, dtype=torch.long, device=device)\n        # This is a temporary fix. We can revert the following code\n        # when PyTorch fixes the abnormal return of torch.randperm.\n        # See: https://github.com/open-mmlab/mmdetection/pull/5014\n        perm = torch.randperm(gallery.numel())[:num].to(device=gallery.device)\n        rand_inds = gallery[perm]\n        if not is_tensor:\n            rand_inds = rand_inds.cpu().numpy()\n        return rand_inds\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some positive samples.\"\"\"\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0, as_tuple=False)\n        if pos_inds.numel() != 0:\n            pos_inds = pos_inds.squeeze(1)\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.random_choice(pos_inds, num_expected)\n\n    def _sample_neg(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some negative samples.\"\"\"\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0, as_tuple=False)\n        if neg_inds.numel() != 0:\n            neg_inds = neg_inds.squeeze(1)\n        if len(neg_inds) <= num_expected:\n            return neg_inds\n        else:\n            return self.random_choice(neg_inds, num_expected)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/sampling_result.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.utils import util_mixins\n\n\nclass SamplingResult(util_mixins.NiceRepr):\n    \"\"\"Bbox sampling result.\n\n    Example:\n        >>> # xdoctest: +IGNORE_WANT\n        >>> from mmdet.core.bbox.samplers.sampling_result import *  # NOQA\n        >>> self = SamplingResult.random(rng=10)\n        >>> print(f'self = {self}')\n        self = <SamplingResult({\n            'neg_bboxes': torch.Size([12, 4]),\n            'neg_inds': tensor([ 0,  1,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12]),\n            'num_gts': 4,\n            'pos_assigned_gt_inds': tensor([], dtype=torch.int64),\n            'pos_bboxes': torch.Size([0, 4]),\n            'pos_inds': tensor([], dtype=torch.int64),\n            'pos_is_gt': tensor([], dtype=torch.uint8)\n        })>\n    \"\"\"\n\n    def __init__(self, pos_inds, neg_inds, bboxes, gt_bboxes, assign_result,\n                 gt_flags):\n        self.pos_inds = pos_inds\n        self.neg_inds = neg_inds\n        self.pos_bboxes = bboxes[pos_inds]\n        self.neg_bboxes = bboxes[neg_inds]\n        self.pos_is_gt = gt_flags[pos_inds]\n\n        self.num_gts = gt_bboxes.shape[0]\n        self.pos_assigned_gt_inds = assign_result.gt_inds[pos_inds] - 1\n\n        if gt_bboxes.numel() == 0:\n            # hack for index error case\n            assert self.pos_assigned_gt_inds.numel() == 0\n            self.pos_gt_bboxes = torch.empty_like(gt_bboxes).view(-1, 4)\n        else:\n            if len(gt_bboxes.shape) < 2:\n                gt_bboxes = gt_bboxes.view(-1, 4)\n\n            self.pos_gt_bboxes = gt_bboxes[self.pos_assigned_gt_inds.long(), :]\n\n        if assign_result.labels is not None:\n            self.pos_gt_labels = assign_result.labels[pos_inds]\n        else:\n            self.pos_gt_labels = None\n\n    @property\n    def bboxes(self):\n        \"\"\"torch.Tensor: concatenated positive and negative boxes\"\"\"\n        return torch.cat([self.pos_bboxes, self.neg_bboxes])\n\n    def to(self, device):\n        \"\"\"Change the device of the data inplace.\n\n        Example:\n            >>> self = SamplingResult.random()\n            >>> print(f'self = {self.to(None)}')\n            >>> # xdoctest: +REQUIRES(--gpu)\n            >>> print(f'self = {self.to(0)}')\n        \"\"\"\n        _dict = self.__dict__\n        for key, value in _dict.items():\n            if isinstance(value, torch.Tensor):\n                _dict[key] = value.to(device)\n        return self\n\n    def __nice__(self):\n        data = self.info.copy()\n        data['pos_bboxes'] = data.pop('pos_bboxes').shape\n        data['neg_bboxes'] = data.pop('neg_bboxes').shape\n        parts = [f\"'{k}': {v!r}\" for k, v in sorted(data.items())]\n        body = '    ' + ',\\n    '.join(parts)\n        return '{\\n' + body + '\\n}'\n\n    @property\n    def info(self):\n        \"\"\"Returns a dictionary of info about the object.\"\"\"\n        return {\n            'pos_inds': self.pos_inds,\n            'neg_inds': self.neg_inds,\n            'pos_bboxes': self.pos_bboxes,\n            'neg_bboxes': self.neg_bboxes,\n            'pos_is_gt': self.pos_is_gt,\n            'num_gts': self.num_gts,\n            'pos_assigned_gt_inds': self.pos_assigned_gt_inds,\n        }\n\n    @classmethod\n    def random(cls, rng=None, **kwargs):\n        \"\"\"\n        Args:\n            rng (None | int | numpy.random.RandomState): seed or state.\n            kwargs (keyword arguments):\n                - num_preds: number of predicted boxes\n                - num_gts: number of true boxes\n                - p_ignore (float): probability of a predicted box assigned to \\\n                    an ignored truth.\n                - p_assigned (float): probability of a predicted box not being \\\n                    assigned.\n                - p_use_label (float | bool): with labels or not.\n\n        Returns:\n            :obj:`SamplingResult`: Randomly generated sampling result.\n\n        Example:\n            >>> from mmdet.core.bbox.samplers.sampling_result import *  # NOQA\n            >>> self = SamplingResult.random()\n            >>> print(self.__dict__)\n        \"\"\"\n        from mmdet.core.bbox import demodata\n        from mmdet.core.bbox.assigners.assign_result import AssignResult\n        from mmdet.core.bbox.samplers.random_sampler import RandomSampler\n        rng = demodata.ensure_rng(rng)\n\n        # make probabilistic?\n        num = 32\n        pos_fraction = 0.5\n        neg_pos_ub = -1\n\n        assign_result = AssignResult.random(rng=rng, **kwargs)\n\n        # Note we could just compute an assignment\n        bboxes = demodata.random_boxes(assign_result.num_preds, rng=rng)\n        gt_bboxes = demodata.random_boxes(assign_result.num_gts, rng=rng)\n\n        if rng.rand() > 0.2:\n            # sometimes algorithms squeeze their data, be robust to that\n            gt_bboxes = gt_bboxes.squeeze()\n            bboxes = bboxes.squeeze()\n\n        if assign_result.labels is None:\n            gt_labels = None\n        else:\n            gt_labels = None  # todo\n\n        if gt_labels is None:\n            add_gt_as_proposals = False\n        else:\n            add_gt_as_proposals = True  # make probabilistic?\n\n        sampler = RandomSampler(\n            num,\n            pos_fraction,\n            neg_pos_ub=neg_pos_ub,\n            add_gt_as_proposals=add_gt_as_proposals,\n            rng=rng)\n        self = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)\n        return self\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/samplers/score_hlr_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import nms_match\n\nfrom ..builder import BBOX_SAMPLERS\nfrom ..transforms import bbox2roi\nfrom .base_sampler import BaseSampler\nfrom .sampling_result import SamplingResult\n\n\n@BBOX_SAMPLERS.register_module()\nclass ScoreHLRSampler(BaseSampler):\n    r\"\"\"Importance-based Sample Reweighting (ISR_N), described in `Prime Sample\n    Attention in Object Detection <https://arxiv.org/abs/1904.04821>`_.\n\n    Score hierarchical local rank (HLR) differentiates with RandomSampler in\n    negative part. It firstly computes Score-HLR in a two-step way,\n    then linearly maps score hlr to the loss weights.\n\n    Args:\n        num (int): Total number of sampled RoIs.\n        pos_fraction (float): Fraction of positive samples.\n        context (:class:`BaseRoIHead`): RoI head that the sampler belongs to.\n        neg_pos_ub (int): Upper bound of the ratio of num negative to num\n            positive, -1 means no upper bound.\n        add_gt_as_proposals (bool): Whether to add ground truth as proposals.\n        k (float): Power of the non-linear mapping.\n        bias (float): Shift of the non-linear mapping.\n        score_thr (float): Minimum score that a negative sample is to be\n            considered as valid bbox.\n    \"\"\"\n\n    def __init__(self,\n                 num,\n                 pos_fraction,\n                 context,\n                 neg_pos_ub=-1,\n                 add_gt_as_proposals=True,\n                 k=0.5,\n                 bias=0,\n                 score_thr=0.05,\n                 iou_thr=0.5,\n                 **kwargs):\n        super().__init__(num, pos_fraction, neg_pos_ub, add_gt_as_proposals)\n        self.k = k\n        self.bias = bias\n        self.score_thr = score_thr\n        self.iou_thr = iou_thr\n        self.context = context\n        # context of cascade detectors is a list, so distinguish them here.\n        if not hasattr(context, 'num_stages'):\n            self.bbox_roi_extractor = context.bbox_roi_extractor\n            self.bbox_head = context.bbox_head\n            self.with_shared_head = context.with_shared_head\n            if self.with_shared_head:\n                self.shared_head = context.shared_head\n        else:\n            self.bbox_roi_extractor = context.bbox_roi_extractor[\n                context.current_stage]\n            self.bbox_head = context.bbox_head[context.current_stage]\n\n    @staticmethod\n    def random_choice(gallery, num):\n        \"\"\"Randomly select some elements from the gallery.\n\n        If `gallery` is a Tensor, the returned indices will be a Tensor;\n        If `gallery` is a ndarray or list, the returned indices will be a\n        ndarray.\n\n        Args:\n            gallery (Tensor | ndarray | list): indices pool.\n            num (int): expected sample num.\n\n        Returns:\n            Tensor or ndarray: sampled indices.\n        \"\"\"\n        assert len(gallery) >= num\n\n        is_tensor = isinstance(gallery, torch.Tensor)\n        if not is_tensor:\n            if torch.cuda.is_available():\n                device = torch.cuda.current_device()\n            else:\n                device = 'cpu'\n            gallery = torch.tensor(gallery, dtype=torch.long, device=device)\n        perm = torch.randperm(gallery.numel(), device=gallery.device)[:num]\n        rand_inds = gallery[perm]\n        if not is_tensor:\n            rand_inds = rand_inds.cpu().numpy()\n        return rand_inds\n\n    def _sample_pos(self, assign_result, num_expected, **kwargs):\n        \"\"\"Randomly sample some positive samples.\"\"\"\n        pos_inds = torch.nonzero(assign_result.gt_inds > 0).flatten()\n        if pos_inds.numel() <= num_expected:\n            return pos_inds\n        else:\n            return self.random_choice(pos_inds, num_expected)\n\n    def _sample_neg(self,\n                    assign_result,\n                    num_expected,\n                    bboxes,\n                    feats=None,\n                    img_meta=None,\n                    **kwargs):\n        \"\"\"Sample negative samples.\n\n        Score-HLR sampler is done in the following steps:\n        1. Take the maximum positive score prediction of each negative samples\n            as s_i.\n        2. Filter out negative samples whose s_i <= score_thr, the left samples\n            are called valid samples.\n        3. Use NMS-Match to divide valid samples into different groups,\n            samples in the same group will greatly overlap with each other\n        4. Rank the matched samples in two-steps to get Score-HLR.\n            (1) In the same group, rank samples with their scores.\n            (2) In the same score rank across different groups,\n                rank samples with their scores again.\n        5. Linearly map Score-HLR to the final label weights.\n\n        Args:\n            assign_result (:obj:`AssignResult`): result of assigner.\n            num_expected (int): Expected number of samples.\n            bboxes (Tensor): bbox to be sampled.\n            feats (Tensor): Features come from FPN.\n            img_meta (dict): Meta information dictionary.\n        \"\"\"\n        neg_inds = torch.nonzero(assign_result.gt_inds == 0).flatten()\n        num_neg = neg_inds.size(0)\n        if num_neg == 0:\n            return neg_inds, None\n        with torch.no_grad():\n            neg_bboxes = bboxes[neg_inds]\n            neg_rois = bbox2roi([neg_bboxes])\n            bbox_result = self.context._bbox_forward(feats, neg_rois)\n            cls_score, bbox_pred = bbox_result['cls_score'], bbox_result[\n                'bbox_pred']\n\n            ori_loss = self.bbox_head.loss(\n                cls_score=cls_score,\n                bbox_pred=None,\n                rois=None,\n                labels=neg_inds.new_full((num_neg, ),\n                                         self.bbox_head.num_classes),\n                label_weights=cls_score.new_ones(num_neg),\n                bbox_targets=None,\n                bbox_weights=None,\n                reduction_override='none')['loss_cls']\n\n            # filter out samples with the max score lower than score_thr\n            max_score, argmax_score = cls_score.softmax(-1)[:, :-1].max(-1)\n            valid_inds = (max_score > self.score_thr).nonzero().view(-1)\n            invalid_inds = (max_score <= self.score_thr).nonzero().view(-1)\n            num_valid = valid_inds.size(0)\n            num_invalid = invalid_inds.size(0)\n\n            num_expected = min(num_neg, num_expected)\n            num_hlr = min(num_valid, num_expected)\n            num_rand = num_expected - num_hlr\n            if num_valid > 0:\n                valid_rois = neg_rois[valid_inds]\n                valid_max_score = max_score[valid_inds]\n                valid_argmax_score = argmax_score[valid_inds]\n                valid_bbox_pred = bbox_pred[valid_inds]\n\n                # valid_bbox_pred shape: [num_valid, #num_classes, 4]\n                valid_bbox_pred = valid_bbox_pred.view(\n                    valid_bbox_pred.size(0), -1, 4)\n                selected_bbox_pred = valid_bbox_pred[range(num_valid),\n                                                     valid_argmax_score]\n                pred_bboxes = self.bbox_head.bbox_coder.decode(\n                    valid_rois[:, 1:], selected_bbox_pred)\n                pred_bboxes_with_score = torch.cat(\n                    [pred_bboxes, valid_max_score[:, None]], -1)\n                group = nms_match(pred_bboxes_with_score, self.iou_thr)\n\n                # imp: importance\n                imp = cls_score.new_zeros(num_valid)\n                for g in group:\n                    g_score = valid_max_score[g]\n                    # g_score has already sorted\n                    rank = g_score.new_tensor(range(g_score.size(0)))\n                    imp[g] = num_valid - rank + g_score\n                _, imp_rank_inds = imp.sort(descending=True)\n                _, imp_rank = imp_rank_inds.sort()\n                hlr_inds = imp_rank_inds[:num_expected]\n\n                if num_rand > 0:\n                    rand_inds = torch.randperm(num_invalid)[:num_rand]\n                    select_inds = torch.cat(\n                        [valid_inds[hlr_inds], invalid_inds[rand_inds]])\n                else:\n                    select_inds = valid_inds[hlr_inds]\n\n                neg_label_weights = cls_score.new_ones(num_expected)\n\n                up_bound = max(num_expected, num_valid)\n                imp_weights = (up_bound -\n                               imp_rank[hlr_inds].float()) / up_bound\n                neg_label_weights[:num_hlr] = imp_weights\n                neg_label_weights[num_hlr:] = imp_weights.min()\n                neg_label_weights = (self.bias +\n                                     (1 - self.bias) * neg_label_weights).pow(\n                                         self.k)\n                ori_selected_loss = ori_loss[select_inds]\n                new_loss = ori_selected_loss * neg_label_weights\n                norm_ratio = ori_selected_loss.sum() / new_loss.sum()\n                neg_label_weights *= norm_ratio\n            else:\n                neg_label_weights = cls_score.new_ones(num_expected)\n                select_inds = torch.randperm(num_neg)[:num_expected]\n\n            return neg_inds[select_inds], neg_label_weights\n\n    def sample(self,\n               assign_result,\n               bboxes,\n               gt_bboxes,\n               gt_labels=None,\n               img_meta=None,\n               **kwargs):\n        \"\"\"Sample positive and negative bboxes.\n\n        This is a simple implementation of bbox sampling given candidates,\n        assigning results and ground truth bboxes.\n\n        Args:\n            assign_result (:obj:`AssignResult`): Bbox assigning results.\n            bboxes (Tensor): Boxes to be sampled from.\n            gt_bboxes (Tensor): Ground truth bboxes.\n            gt_labels (Tensor, optional): Class labels of ground truth bboxes.\n\n        Returns:\n            tuple[:obj:`SamplingResult`, Tensor]: Sampling result and negative\n                label weights.\n        \"\"\"\n        bboxes = bboxes[:, :4]\n\n        gt_flags = bboxes.new_zeros((bboxes.shape[0], ), dtype=torch.uint8)\n        if self.add_gt_as_proposals:\n            bboxes = torch.cat([gt_bboxes, bboxes], dim=0)\n            assign_result.add_gt_(gt_labels)\n            gt_ones = bboxes.new_ones(gt_bboxes.shape[0], dtype=torch.uint8)\n            gt_flags = torch.cat([gt_ones, gt_flags])\n\n        num_expected_pos = int(self.num * self.pos_fraction)\n        pos_inds = self.pos_sampler._sample_pos(\n            assign_result, num_expected_pos, bboxes=bboxes, **kwargs)\n        num_sampled_pos = pos_inds.numel()\n        num_expected_neg = self.num - num_sampled_pos\n        if self.neg_pos_ub >= 0:\n            _pos = max(1, num_sampled_pos)\n            neg_upper_bound = int(self.neg_pos_ub * _pos)\n            if num_expected_neg > neg_upper_bound:\n                num_expected_neg = neg_upper_bound\n        neg_inds, neg_label_weights = self.neg_sampler._sample_neg(\n            assign_result,\n            num_expected_neg,\n            bboxes,\n            img_meta=img_meta,\n            **kwargs)\n\n        return SamplingResult(pos_inds, neg_inds, bboxes, gt_bboxes,\n                              assign_result, gt_flags), neg_label_weights\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/bbox/transforms.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\n\ndef find_inside_bboxes(bboxes, img_h, img_w):\n    \"\"\"Find bboxes as long as a part of bboxes is inside the image.\n\n    Args:\n        bboxes (Tensor): Shape (N, 4).\n        img_h (int): Image height.\n        img_w (int): Image width.\n\n    Returns:\n        Tensor: Index of the remaining bboxes.\n    \"\"\"\n    inside_inds = (bboxes[:, 0] < img_w) & (bboxes[:, 2] > 0) \\\n        & (bboxes[:, 1] < img_h) & (bboxes[:, 3] > 0)\n    return inside_inds\n\n\ndef bbox_flip(bboxes, img_shape, direction='horizontal'):\n    \"\"\"Flip bboxes horizontally or vertically.\n\n    Args:\n        bboxes (Tensor): Shape (..., 4*k)\n        img_shape (tuple): Image shape.\n        direction (str): Flip direction, options are \"horizontal\", \"vertical\",\n            \"diagonal\". Default: \"horizontal\"\n\n    Returns:\n        Tensor: Flipped bboxes.\n    \"\"\"\n    assert bboxes.shape[-1] % 4 == 0\n    assert direction in ['horizontal', 'vertical', 'diagonal']\n    flipped = bboxes.clone()\n    if direction == 'horizontal':\n        flipped[..., 0::4] = img_shape[1] - bboxes[..., 2::4]\n        flipped[..., 2::4] = img_shape[1] - bboxes[..., 0::4]\n    elif direction == 'vertical':\n        flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4]\n        flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4]\n    else:\n        flipped[..., 0::4] = img_shape[1] - bboxes[..., 2::4]\n        flipped[..., 1::4] = img_shape[0] - bboxes[..., 3::4]\n        flipped[..., 2::4] = img_shape[1] - bboxes[..., 0::4]\n        flipped[..., 3::4] = img_shape[0] - bboxes[..., 1::4]\n    return flipped\n\n\ndef bbox_mapping(bboxes,\n                 img_shape,\n                 scale_factor,\n                 flip,\n                 flip_direction='horizontal'):\n    \"\"\"Map bboxes from the original image scale to testing scale.\"\"\"\n    new_bboxes = bboxes * bboxes.new_tensor(scale_factor)\n    if flip:\n        new_bboxes = bbox_flip(new_bboxes, img_shape, flip_direction)\n    return new_bboxes\n\n\ndef bbox_mapping_back(bboxes,\n                      img_shape,\n                      scale_factor,\n                      flip,\n                      flip_direction='horizontal'):\n    \"\"\"Map bboxes from testing scale to original image scale.\"\"\"\n    new_bboxes = bbox_flip(bboxes, img_shape,\n                           flip_direction) if flip else bboxes\n    new_bboxes = new_bboxes.view(-1, 4) / new_bboxes.new_tensor(scale_factor)\n    return new_bboxes.view(bboxes.shape)\n\n\ndef bbox2roi(bbox_list):\n    \"\"\"Convert a list of bboxes to roi format.\n\n    Args:\n        bbox_list (list[Tensor]): a list of bboxes corresponding to a batch\n            of images.\n\n    Returns:\n        Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2]\n    \"\"\"\n    rois_list = []\n    for img_id, bboxes in enumerate(bbox_list):\n        if bboxes.size(0) > 0:\n            img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)\n            rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1)\n        else:\n            rois = bboxes.new_zeros((0, 5))\n        rois_list.append(rois)\n    rois = torch.cat(rois_list, 0)\n    return rois\n\n\ndef roi2bbox(rois):\n    \"\"\"Convert rois to bounding box format.\n\n    Args:\n        rois (torch.Tensor): RoIs with the shape (n, 5) where the first\n            column indicates batch id of each RoI.\n\n    Returns:\n        list[torch.Tensor]: Converted boxes of corresponding rois.\n    \"\"\"\n    bbox_list = []\n    img_ids = torch.unique(rois[:, 0].cpu(), sorted=True)\n    for img_id in img_ids:\n        inds = (rois[:, 0] == img_id.item())\n        bbox = rois[inds, 1:]\n        bbox_list.append(bbox)\n    return bbox_list\n\n\ndef bbox2result(bboxes, labels, num_classes):\n    \"\"\"Convert detection results to a list of numpy arrays.\n\n    Args:\n        bboxes (torch.Tensor | np.ndarray): shape (n, 5)\n        labels (torch.Tensor | np.ndarray): shape (n, )\n        num_classes (int): class number, including background class\n\n    Returns:\n        list(ndarray): bbox results of each class\n    \"\"\"\n    if bboxes.shape[0] == 0:\n        return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)]\n    else:\n        if isinstance(bboxes, torch.Tensor):\n            bboxes = bboxes.detach().cpu().numpy()\n            labels = labels.detach().cpu().numpy()\n        return [bboxes[labels == i, :] for i in range(num_classes)]\n\n\ndef distance2bbox(points, distance, max_shape=None):\n    \"\"\"Decode distance prediction to bounding box.\n\n    Args:\n        points (Tensor): Shape (B, N, 2) or (N, 2).\n        distance (Tensor): Distance from the given point to 4\n            boundaries (left, top, right, bottom). Shape (B, N, 4) or (N, 4)\n        max_shape (Sequence[int] or torch.Tensor or Sequence[\n            Sequence[int]],optional): Maximum bounds for boxes, specifies\n            (H, W, C) or (H, W). If priors shape is (B, N, 4), then\n            the max_shape should be a Sequence[Sequence[int]]\n            and the length of max_shape should also be B.\n\n    Returns:\n        Tensor: Boxes with shape (N, 4) or (B, N, 4)\n    \"\"\"\n\n    x1 = points[..., 0] - distance[..., 0]\n    y1 = points[..., 1] - distance[..., 1]\n    x2 = points[..., 0] + distance[..., 2]\n    y2 = points[..., 1] + distance[..., 3]\n\n    bboxes = torch.stack([x1, y1, x2, y2], -1)\n\n    if max_shape is not None:\n        if bboxes.dim() == 2 and not torch.onnx.is_in_onnx_export():\n            # speed up\n            bboxes[:, 0::2].clamp_(min=0, max=max_shape[1])\n            bboxes[:, 1::2].clamp_(min=0, max=max_shape[0])\n            return bboxes\n\n        # clip bboxes with dynamic `min` and `max` for onnx\n        if torch.onnx.is_in_onnx_export():\n            from mmdet.core.export import dynamic_clip_for_onnx\n            x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape)\n            bboxes = torch.stack([x1, y1, x2, y2], dim=-1)\n            return bboxes\n        if not isinstance(max_shape, torch.Tensor):\n            max_shape = x1.new_tensor(max_shape)\n        max_shape = max_shape[..., :2].type_as(x1)\n        if max_shape.ndim == 2:\n            assert bboxes.ndim == 3\n            assert max_shape.size(0) == bboxes.size(0)\n\n        min_xy = x1.new_tensor(0)\n        max_xy = torch.cat([max_shape, max_shape],\n                           dim=-1).flip(-1).unsqueeze(-2)\n        bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)\n        bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)\n\n    return bboxes\n\n\ndef bbox2distance(points, bbox, max_dis=None, eps=0.1):\n    \"\"\"Decode bounding box based on distances.\n\n    Args:\n        points (Tensor): Shape (n, 2), [x, y].\n        bbox (Tensor): Shape (n, 4), \"xyxy\" format\n        max_dis (float): Upper bound of the distance.\n        eps (float): a small value to ensure target < max_dis, instead <=\n\n    Returns:\n        Tensor: Decoded distances.\n    \"\"\"\n    left = points[:, 0] - bbox[:, 0]\n    top = points[:, 1] - bbox[:, 1]\n    right = bbox[:, 2] - points[:, 0]\n    bottom = bbox[:, 3] - points[:, 1]\n    if max_dis is not None:\n        left = left.clamp(min=0, max=max_dis - eps)\n        top = top.clamp(min=0, max=max_dis - eps)\n        right = right.clamp(min=0, max=max_dis - eps)\n        bottom = bottom.clamp(min=0, max=max_dis - eps)\n    return torch.stack([left, top, right, bottom], -1)\n\n\ndef bbox_rescale(bboxes, scale_factor=1.0):\n    \"\"\"Rescale bounding box w.r.t. scale_factor.\n\n    Args:\n        bboxes (Tensor): Shape (n, 4) for bboxes or (n, 5) for rois\n        scale_factor (float): rescale factor\n\n    Returns:\n        Tensor: Rescaled bboxes.\n    \"\"\"\n    if bboxes.size(1) == 5:\n        bboxes_ = bboxes[:, 1:]\n        inds_ = bboxes[:, 0]\n    else:\n        bboxes_ = bboxes\n    cx = (bboxes_[:, 0] + bboxes_[:, 2]) * 0.5\n    cy = (bboxes_[:, 1] + bboxes_[:, 3]) * 0.5\n    w = bboxes_[:, 2] - bboxes_[:, 0]\n    h = bboxes_[:, 3] - bboxes_[:, 1]\n    w = w * scale_factor\n    h = h * scale_factor\n    x1 = cx - 0.5 * w\n    x2 = cx + 0.5 * w\n    y1 = cy - 0.5 * h\n    y2 = cy + 0.5 * h\n    if bboxes.size(1) == 5:\n        rescaled_bboxes = torch.stack([inds_, x1, y1, x2, y2], dim=-1)\n    else:\n        rescaled_bboxes = torch.stack([x1, y1, x2, y2], dim=-1)\n    return rescaled_bboxes\n\n\ndef bbox_cxcywh_to_xyxy(bbox):\n    \"\"\"Convert bbox coordinates from (cx, cy, w, h) to (x1, y1, x2, y2).\n\n    Args:\n        bbox (Tensor): Shape (n, 4) for bboxes.\n\n    Returns:\n        Tensor: Converted bboxes.\n    \"\"\"\n    cx, cy, w, h = bbox.split((1, 1, 1, 1), dim=-1)\n    bbox_new = [(cx - 0.5 * w), (cy - 0.5 * h), (cx + 0.5 * w), (cy + 0.5 * h)]\n    return torch.cat(bbox_new, dim=-1)\n\n\ndef bbox_xyxy_to_cxcywh(bbox):\n    \"\"\"Convert bbox coordinates from (x1, y1, x2, y2) to (cx, cy, w, h).\n\n    Args:\n        bbox (Tensor): Shape (n, 4) for bboxes.\n\n    Returns:\n        Tensor: Converted bboxes.\n    \"\"\"\n    x1, y1, x2, y2 = bbox.split((1, 1, 1, 1), dim=-1)\n    bbox_new = [(x1 + x2) / 2, (y1 + y2) / 2, (x2 - x1), (y2 - y1)]\n    return torch.cat(bbox_new, dim=-1)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/data_structures/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .general_data import GeneralData\nfrom .instance_data import InstanceData\n\n__all__ = ['GeneralData', 'InstanceData']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/data_structures/general_data.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport numpy as np\nimport torch\n\nfrom mmdet.utils.util_mixins import NiceRepr\n\n\nclass GeneralData(NiceRepr):\n    \"\"\"A general data structure of OpenMMlab.\n\n    A data structure that stores the meta information,\n    the annotations of the images or the model predictions,\n    which can be used in communication between components.\n\n    The attributes in `GeneralData` are divided into two parts,\n    the `meta_info_fields` and the `data_fields` respectively.\n\n        - `meta_info_fields`: Usually contains the\n          information about the image such as filename,\n          image_shape, pad_shape, etc. All attributes in\n          it are immutable once set,\n          but the user can add new meta information with\n          `set_meta_info` function, all information can be accessed\n          with methods `meta_info_keys`, `meta_info_values`,\n          `meta_info_items`.\n\n        - `data_fields`: Annotations or model predictions are\n          stored. The attributes can be accessed or modified by\n          dict-like or object-like operations, such as\n          `.` , `[]`, `in`, `del`, `pop(str)` `get(str)`, `keys()`,\n          `values()`, `items()`. Users can also apply tensor-like methods\n          to all obj:`torch.Tensor` in the `data_fileds`,\n          such as `.cuda()`, `.cpu()`, `.numpy()`, `device`, `.to()`\n          `.detach()`, `.numpy()`\n\n    Args:\n        meta_info (dict, optional): A dict contains the meta information\n            of single image. such as `img_shape`, `scale_factor`, etc.\n            Default: None.\n        data (dict, optional): A dict contains annotations of single image or\n            model predictions. Default: None.\n\n    Examples:\n        >>> from mmdet.core import GeneralData\n        >>> img_meta = dict(img_shape=(800, 1196, 3), pad_shape=(800, 1216, 3))\n        >>> instance_data = GeneralData(meta_info=img_meta)\n        >>> img_shape in instance_data\n        True\n        >>> instance_data.det_labels = torch.LongTensor([0, 1, 2, 3])\n        >>> instance_data[\"det_scores\"] = torch.Tensor([0.01, 0.1, 0.2, 0.3])\n        >>> print(results)\n        <GeneralData(\n\n          META INFORMATION\n        img_shape: (800, 1196, 3)\n        pad_shape: (800, 1216, 3)\n\n          DATA FIELDS\n        shape of det_labels: torch.Size([4])\n        shape of det_scores: torch.Size([4])\n\n        ) at 0x7f84acd10f90>\n        >>> instance_data.det_scores\n        tensor([0.0100, 0.1000, 0.2000, 0.3000])\n        >>> instance_data.det_labels\n        tensor([0, 1, 2, 3])\n        >>> instance_data['det_labels']\n        tensor([0, 1, 2, 3])\n        >>> 'det_labels' in instance_data\n        True\n        >>> instance_data.img_shape\n        (800, 1196, 3)\n        >>> 'det_scores' in instance_data\n        True\n        >>> del instance_data.det_scores\n        >>> 'det_scores' in instance_data\n        False\n        >>> det_labels = instance_data.pop('det_labels', None)\n        >>> det_labels\n        tensor([0, 1, 2, 3])\n        >>> 'det_labels' in instance_data\n        >>> False\n    \"\"\"\n\n    def __init__(self, meta_info=None, data=None):\n\n        self._meta_info_fields = set()\n        self._data_fields = set()\n\n        if meta_info is not None:\n            self.set_meta_info(meta_info=meta_info)\n        if data is not None:\n            self.set_data(data)\n\n    def set_meta_info(self, meta_info):\n        \"\"\"Add meta information.\n\n        Args:\n            meta_info (dict): A dict contains the meta information\n                of image. such as `img_shape`, `scale_factor`, etc.\n                Default: None.\n        \"\"\"\n        assert isinstance(meta_info,\n                          dict), f'meta should be a `dict` but get {meta_info}'\n        meta = copy.deepcopy(meta_info)\n        for k, v in meta.items():\n            # should be consistent with original meta_info\n            if k in self._meta_info_fields:\n                ori_value = getattr(self, k)\n                if isinstance(ori_value, (torch.Tensor, np.ndarray)):\n                    if (ori_value == v).all():\n                        continue\n                    else:\n                        raise KeyError(\n                            f'img_meta_info {k} has been set as '\n                            f'{getattr(self, k)} before, which is immutable ')\n                elif ori_value == v:\n                    continue\n                else:\n                    raise KeyError(\n                        f'img_meta_info {k} has been set as '\n                        f'{getattr(self, k)} before, which is immutable ')\n            else:\n                self._meta_info_fields.add(k)\n                self.__dict__[k] = v\n\n    def set_data(self, data):\n        \"\"\"Update a dict to `data_fields`.\n\n        Args:\n            data (dict): A dict contains annotations of image or\n                model predictions. Default: None.\n        \"\"\"\n        assert isinstance(data,\n                          dict), f'meta should be a `dict` but get {data}'\n        for k, v in data.items():\n            self.__setattr__(k, v)\n\n    def new(self, meta_info=None, data=None):\n        \"\"\"Return a new results with same image meta information.\n\n        Args:\n            meta_info (dict, optional): A dict contains the meta information\n                of image. such as `img_shape`, `scale_factor`, etc.\n                Default: None.\n            data (dict, optional): A dict contains annotations of image or\n                model predictions. Default: None.\n        \"\"\"\n        new_data = self.__class__()\n        new_data.set_meta_info(dict(self.meta_info_items()))\n        if meta_info is not None:\n            new_data.set_meta_info(meta_info)\n        if data is not None:\n            new_data.set_data(data)\n        return new_data\n\n    def keys(self):\n        \"\"\"\n        Returns:\n            list: Contains all keys in data_fields.\n        \"\"\"\n        return [key for key in self._data_fields]\n\n    def meta_info_keys(self):\n        \"\"\"\n        Returns:\n            list: Contains all keys in meta_info_fields.\n        \"\"\"\n        return [key for key in self._meta_info_fields]\n\n    def values(self):\n        \"\"\"\n        Returns:\n            list: Contains all values in data_fields.\n        \"\"\"\n        return [getattr(self, k) for k in self.keys()]\n\n    def meta_info_values(self):\n        \"\"\"\n        Returns:\n            list: Contains all values in meta_info_fields.\n        \"\"\"\n        return [getattr(self, k) for k in self.meta_info_keys()]\n\n    def items(self):\n        for k in self.keys():\n            yield (k, getattr(self, k))\n\n    def meta_info_items(self):\n        for k in self.meta_info_keys():\n            yield (k, getattr(self, k))\n\n    def __setattr__(self, name, val):\n        if name in ('_meta_info_fields', '_data_fields'):\n            if not hasattr(self, name):\n                super().__setattr__(name, val)\n            else:\n                raise AttributeError(\n                    f'{name} has been used as a '\n                    f'private attribute, which is immutable. ')\n        else:\n            if name in self._meta_info_fields:\n                raise AttributeError(f'`{name}` is used in meta information,'\n                                     f'which is immutable')\n\n            self._data_fields.add(name)\n            super().__setattr__(name, val)\n\n    def __delattr__(self, item):\n\n        if item in ('_meta_info_fields', '_data_fields'):\n            raise AttributeError(f'{item} has been used as a '\n                                 f'private attribute, which is immutable. ')\n\n        if item in self._meta_info_fields:\n            raise KeyError(f'{item} is used in meta information, '\n                           f'which is immutable.')\n        super().__delattr__(item)\n        if item in self._data_fields:\n            self._data_fields.remove(item)\n\n    # dict-like methods\n    __setitem__ = __setattr__\n    __delitem__ = __delattr__\n\n    def __getitem__(self, name):\n        return getattr(self, name)\n\n    def get(self, *args):\n        assert len(args) < 3, '`get` get more than 2 arguments'\n        return self.__dict__.get(*args)\n\n    def pop(self, *args):\n        assert len(args) < 3, '`pop` get more than 2 arguments'\n        name = args[0]\n        if name in self._meta_info_fields:\n            raise KeyError(f'{name} is a key in meta information, '\n                           f'which is immutable')\n\n        if args[0] in self._data_fields:\n            self._data_fields.remove(args[0])\n            return self.__dict__.pop(*args)\n\n        # with default value\n        elif len(args) == 2:\n            return args[1]\n        else:\n            raise KeyError(f'{args[0]}')\n\n    def __contains__(self, item):\n        return item in self._data_fields or \\\n                    item in self._meta_info_fields\n\n    # Tensor-like methods\n    def to(self, *args, **kwargs):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if hasattr(v, 'to'):\n                v = v.to(*args, **kwargs)\n            new_data[k] = v\n        return new_data\n\n    # Tensor-like methods\n    def cpu(self):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if isinstance(v, torch.Tensor):\n                v = v.cpu()\n            new_data[k] = v\n        return new_data\n\n    # Tensor-like methods\n    def npu(self):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if isinstance(v, torch.Tensor):\n                v = v.npu()\n            new_data[k] = v\n        return new_data\n\n    # Tensor-like methods\n    def mlu(self):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if isinstance(v, torch.Tensor):\n                v = v.mlu()\n            new_data[k] = v\n        return new_data\n\n    # Tensor-like methods\n    def cuda(self):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if isinstance(v, torch.Tensor):\n                v = v.cuda()\n            new_data[k] = v\n        return new_data\n\n    # Tensor-like methods\n    def detach(self):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if isinstance(v, torch.Tensor):\n                v = v.detach()\n            new_data[k] = v\n        return new_data\n\n    # Tensor-like methods\n    def numpy(self):\n        \"\"\"Apply same name function to all tensors in data_fields.\"\"\"\n        new_data = self.new()\n        for k, v in self.items():\n            if isinstance(v, torch.Tensor):\n                v = v.detach().cpu().numpy()\n            new_data[k] = v\n        return new_data\n\n    def __nice__(self):\n        repr = '\\n \\n  META INFORMATION \\n'\n        for k, v in self.meta_info_items():\n            repr += f'{k}: {v} \\n'\n        repr += '\\n   DATA FIELDS \\n'\n        for k, v in self.items():\n            if isinstance(v, (torch.Tensor, np.ndarray)):\n                repr += f'shape of {k}: {v.shape} \\n'\n            else:\n                repr += f'{k}: {v} \\n'\n        return repr + '\\n'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/data_structures/instance_data.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport itertools\n\nimport numpy as np\nimport torch\n\nfrom .general_data import GeneralData\n\n\nclass InstanceData(GeneralData):\n    \"\"\"Data structure for instance-level annnotations or predictions.\n\n    Subclass of :class:`GeneralData`. All value in `data_fields`\n    should have the same length. This design refer to\n    https://github.com/facebookresearch/detectron2/blob/master/detectron2/structures/instances.py # noqa E501\n\n    Examples:\n        >>> from mmdet.core import InstanceData\n        >>> import numpy as np\n        >>> img_meta = dict(img_shape=(800, 1196, 3), pad_shape=(800, 1216, 3))\n        >>> results = InstanceData(img_meta)\n        >>> img_shape in results\n        True\n        >>> results.det_labels = torch.LongTensor([0, 1, 2, 3])\n        >>> results[\"det_scores\"] = torch.Tensor([0.01, 0.7, 0.6, 0.3])\n        >>> results[\"det_masks\"] = np.ndarray(4, 2, 2)\n        >>> len(results)\n        4\n        >>> print(resutls)\n        <InstanceData(\n\n            META INFORMATION\n        pad_shape: (800, 1216, 3)\n        img_shape: (800, 1196, 3)\n\n            PREDICTIONS\n        shape of det_labels: torch.Size([4])\n        shape of det_masks: (4, 2, 2)\n        shape of det_scores: torch.Size([4])\n\n        ) at 0x7fe26b5ca990>\n        >>> sorted_results = results[results.det_scores.sort().indices]\n        >>> sorted_results.det_scores\n        tensor([0.0100, 0.3000, 0.6000, 0.7000])\n        >>> sorted_results.det_labels\n        tensor([0, 3, 2, 1])\n        >>> print(results[results.scores > 0.5])\n        <InstanceData(\n\n            META INFORMATION\n        pad_shape: (800, 1216, 3)\n        img_shape: (800, 1196, 3)\n\n            PREDICTIONS\n        shape of det_labels: torch.Size([2])\n        shape of det_masks: (2, 2, 2)\n        shape of det_scores: torch.Size([2])\n\n        ) at 0x7fe26b6d7790>\n        >>> results[results.det_scores > 0.5].det_labels\n        tensor([1, 2])\n        >>> results[results.det_scores > 0.5].det_scores\n        tensor([0.7000, 0.6000])\n    \"\"\"\n\n    def __setattr__(self, name, value):\n\n        if name in ('_meta_info_fields', '_data_fields'):\n            if not hasattr(self, name):\n                super().__setattr__(name, value)\n            else:\n                raise AttributeError(\n                    f'{name} has been used as a '\n                    f'private attribute, which is immutable. ')\n\n        else:\n            assert isinstance(value, (torch.Tensor, np.ndarray, list)), \\\n                f'Can set {type(value)}, only support' \\\n                f' {(torch.Tensor, np.ndarray, list)}'\n\n            if self._data_fields:\n                assert len(value) == len(self), f'the length of ' \\\n                                             f'values {len(value)} is ' \\\n                                             f'not consistent with' \\\n                                             f' the length ' \\\n                                             f'of this :obj:`InstanceData` ' \\\n                                             f'{len(self)} '\n            super().__setattr__(name, value)\n\n    def __getitem__(self, item):\n        \"\"\"\n        Args:\n            item (str, obj:`slice`,\n                obj`torch.LongTensor`, obj:`torch.BoolTensor`):\n                get the corresponding values according to item.\n\n        Returns:\n            obj:`InstanceData`: Corresponding values.\n        \"\"\"\n        assert len(self), ' This is a empty instance'\n\n        assert isinstance(\n            item, (str, slice, int, torch.LongTensor, torch.BoolTensor))\n\n        if isinstance(item, str):\n            return getattr(self, item)\n\n        if type(item) == int:\n            if item >= len(self) or item < -len(self):\n                raise IndexError(f'Index {item} out of range!')\n            else:\n                # keep the dimension\n                item = slice(item, None, len(self))\n\n        new_data = self.new()\n        if isinstance(item, (torch.Tensor)):\n            assert item.dim() == 1, 'Only support to get the' \\\n                                 ' values along the first dimension.'\n            if isinstance(item, torch.BoolTensor):\n                assert len(item) == len(self), f'The shape of the' \\\n                                               f' input(BoolTensor)) ' \\\n                                               f'{len(item)} ' \\\n                                               f' does not match the shape ' \\\n                                               f'of the indexed tensor ' \\\n                                               f'in results_filed ' \\\n                                               f'{len(self)} at ' \\\n                                               f'first dimension. '\n\n            for k, v in self.items():\n                if isinstance(v, torch.Tensor):\n                    new_data[k] = v[item]\n                elif isinstance(v, np.ndarray):\n                    new_data[k] = v[item.cpu().numpy()]\n                elif isinstance(v, list):\n                    r_list = []\n                    # convert to indexes from boolTensor\n                    if isinstance(item, torch.BoolTensor):\n                        indexes = torch.nonzero(item).view(-1)\n                    else:\n                        indexes = item\n                    for index in indexes:\n                        r_list.append(v[index])\n                    new_data[k] = r_list\n        else:\n            # item is a slice\n            for k, v in self.items():\n                new_data[k] = v[item]\n        return new_data\n\n    @staticmethod\n    def cat(instances_list):\n        \"\"\"Concat the predictions of all :obj:`InstanceData` in the list.\n\n        Args:\n            instances_list (list[:obj:`InstanceData`]): A list\n                of :obj:`InstanceData`.\n\n        Returns:\n            obj:`InstanceData`\n        \"\"\"\n        assert all(\n            isinstance(results, InstanceData) for results in instances_list)\n        assert len(instances_list) > 0\n        if len(instances_list) == 1:\n            return instances_list[0]\n\n        new_data = instances_list[0].new()\n        for k in instances_list[0]._data_fields:\n            values = [results[k] for results in instances_list]\n            v0 = values[0]\n            if isinstance(v0, torch.Tensor):\n                values = torch.cat(values, dim=0)\n            elif isinstance(v0, np.ndarray):\n                values = np.concatenate(values, axis=0)\n            elif isinstance(v0, list):\n                values = list(itertools.chain(*values))\n            else:\n                raise ValueError(\n                    f'Can not concat the {k} which is a {type(v0)}')\n            new_data[k] = values\n        return new_data\n\n    def __len__(self):\n        if len(self._data_fields):\n            for v in self.values():\n                return len(v)\n        else:\n            raise AssertionError('This is an empty `InstanceData`.')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .class_names import (cityscapes_classes, coco_classes, dataset_aliases,\n                          get_classes, imagenet_det_classes,\n                          imagenet_vid_classes, oid_challenge_classes,\n                          oid_v6_classes, voc_classes)\nfrom .eval_hooks import DistEvalHook, EvalHook\nfrom .mean_ap import average_precision, eval_map, print_map_summary\nfrom .panoptic_utils import INSTANCE_OFFSET\nfrom .recall import (eval_recalls, plot_iou_recall, plot_num_recall,\n                     print_recall_summary)\n\n__all__ = [\n    'voc_classes', 'imagenet_det_classes', 'imagenet_vid_classes',\n    'coco_classes', 'cityscapes_classes', 'dataset_aliases', 'get_classes',\n    'DistEvalHook', 'EvalHook', 'average_precision', 'eval_map',\n    'print_map_summary', 'eval_recalls', 'print_recall_summary',\n    'plot_num_recall', 'plot_iou_recall', 'oid_v6_classes',\n    'oid_challenge_classes', 'INSTANCE_OFFSET'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/bbox_overlaps.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\n\n\ndef bbox_overlaps(bboxes1,\n                  bboxes2,\n                  mode='iou',\n                  eps=1e-6,\n                  use_legacy_coordinate=False):\n    \"\"\"Calculate the ious between each bbox of bboxes1 and bboxes2.\n\n    Args:\n        bboxes1 (ndarray): Shape (n, 4)\n        bboxes2 (ndarray): Shape (k, 4)\n        mode (str): IOU (intersection over union) or IOF (intersection\n            over foreground)\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Note when function is used in `VOCDataset`, it should be\n            True to align with the official implementation\n            `http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCdevkit_18-May-2011.tar`\n            Default: False.\n\n    Returns:\n        ious (ndarray): Shape (n, k)\n    \"\"\"\n\n    assert mode in ['iou', 'iof']\n    if not use_legacy_coordinate:\n        extra_length = 0.\n    else:\n        extra_length = 1.\n    bboxes1 = bboxes1.astype(np.float32)\n    bboxes2 = bboxes2.astype(np.float32)\n    rows = bboxes1.shape[0]\n    cols = bboxes2.shape[0]\n    ious = np.zeros((rows, cols), dtype=np.float32)\n    if rows * cols == 0:\n        return ious\n    exchange = False\n    if bboxes1.shape[0] > bboxes2.shape[0]:\n        bboxes1, bboxes2 = bboxes2, bboxes1\n        ious = np.zeros((cols, rows), dtype=np.float32)\n        exchange = True\n    area1 = (bboxes1[:, 2] - bboxes1[:, 0] + extra_length) * (\n        bboxes1[:, 3] - bboxes1[:, 1] + extra_length)\n    area2 = (bboxes2[:, 2] - bboxes2[:, 0] + extra_length) * (\n        bboxes2[:, 3] - bboxes2[:, 1] + extra_length)\n    for i in range(bboxes1.shape[0]):\n        x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])\n        y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])\n        x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])\n        y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])\n        overlap = np.maximum(x_end - x_start + extra_length, 0) * np.maximum(\n            y_end - y_start + extra_length, 0)\n        if mode == 'iou':\n            union = area1[i] + area2 - overlap\n        else:\n            union = area1[i] if not exchange else area2\n        union = np.maximum(union, eps)\n        ious[i, :] = overlap / union\n    if exchange:\n        ious = ious.T\n    return ious\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/class_names.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\n\n\ndef wider_face_classes():\n    return ['face']\n\n\ndef voc_classes():\n    return [\n        'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat',\n        'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person',\n        'potted plant', 'sheep', 'sofa', 'train', 'tvmonitor'\n    ]\n\n\ndef imagenet_det_classes():\n    return [\n        'accordion', 'airplane', 'ant', 'antelope', 'apple', 'armadillo',\n        'artichoke', 'axe', 'baby_bed', 'backpack', 'bagel', 'balance_beam',\n        'banana', 'band_aid', 'banjo', 'baseball', 'basketball', 'bathing_cap',\n        'beaker', 'bear', 'bee', 'bell_pepper', 'bench', 'bicycle', 'binder',\n        'bird', 'bookshelf', 'bow_tie', 'bow', 'bowl', 'brassiere', 'burrito',\n        'bus', 'butterfly', 'camel', 'can_opener', 'car', 'cart', 'cattle',\n        'cello', 'centipede', 'chain_saw', 'chair', 'chime', 'cocktail_shaker',\n        'coffee_maker', 'computer_keyboard', 'computer_mouse', 'corkscrew',\n        'cream', 'croquet_ball', 'crutch', 'cucumber', 'cup_or_mug', 'diaper',\n        'digital_clock', 'dishwasher', 'dog', 'domestic_cat', 'dragonfly',\n        'drum', 'dumbbell', 'electric_fan', 'elephant', 'face_powder', 'fig',\n        'filing_cabinet', 'flower_pot', 'flute', 'fox', 'french_horn', 'frog',\n        'frying_pan', 'giant_panda', 'goldfish', 'golf_ball', 'golfcart',\n        'guacamole', 'guitar', 'hair_dryer', 'hair_spray', 'hamburger',\n        'hammer', 'hamster', 'harmonica', 'harp', 'hat_with_a_wide_brim',\n        'head_cabbage', 'helmet', 'hippopotamus', 'horizontal_bar', 'horse',\n        'hotdog', 'iPod', 'isopod', 'jellyfish', 'koala_bear', 'ladle',\n        'ladybug', 'lamp', 'laptop', 'lemon', 'lion', 'lipstick', 'lizard',\n        'lobster', 'maillot', 'maraca', 'microphone', 'microwave', 'milk_can',\n        'miniskirt', 'monkey', 'motorcycle', 'mushroom', 'nail', 'neck_brace',\n        'oboe', 'orange', 'otter', 'pencil_box', 'pencil_sharpener', 'perfume',\n        'person', 'piano', 'pineapple', 'ping-pong_ball', 'pitcher', 'pizza',\n        'plastic_bag', 'plate_rack', 'pomegranate', 'popsicle', 'porcupine',\n        'power_drill', 'pretzel', 'printer', 'puck', 'punching_bag', 'purse',\n        'rabbit', 'racket', 'ray', 'red_panda', 'refrigerator',\n        'remote_control', 'rubber_eraser', 'rugby_ball', 'ruler',\n        'salt_or_pepper_shaker', 'saxophone', 'scorpion', 'screwdriver',\n        'seal', 'sheep', 'ski', 'skunk', 'snail', 'snake', 'snowmobile',\n        'snowplow', 'soap_dispenser', 'soccer_ball', 'sofa', 'spatula',\n        'squirrel', 'starfish', 'stethoscope', 'stove', 'strainer',\n        'strawberry', 'stretcher', 'sunglasses', 'swimming_trunks', 'swine',\n        'syringe', 'table', 'tape_player', 'tennis_ball', 'tick', 'tie',\n        'tiger', 'toaster', 'traffic_light', 'train', 'trombone', 'trumpet',\n        'turtle', 'tv_or_monitor', 'unicycle', 'vacuum', 'violin',\n        'volleyball', 'waffle_iron', 'washer', 'water_bottle', 'watercraft',\n        'whale', 'wine_bottle', 'zebra'\n    ]\n\n\ndef imagenet_vid_classes():\n    return [\n        'airplane', 'antelope', 'bear', 'bicycle', 'bird', 'bus', 'car',\n        'cattle', 'dog', 'domestic_cat', 'elephant', 'fox', 'giant_panda',\n        'hamster', 'horse', 'lion', 'lizard', 'monkey', 'motorcycle', 'rabbit',\n        'red_panda', 'sheep', 'snake', 'squirrel', 'tiger', 'train', 'turtle',\n        'watercraft', 'whale', 'zebra'\n    ]\n\n\ndef coco_classes():\n    return [\n        'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train',\n        'truck', 'boat', 'traffic_light', 'fire_hydrant', 'stop_sign',\n        'parking_meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep',\n        'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella',\n        'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard',\n        'sports_ball', 'kite', 'baseball_bat', 'baseball_glove', 'skateboard',\n        'surfboard', 'tennis_racket', 'bottle', 'wine_glass', 'cup', 'fork',\n        'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',\n        'broccoli', 'carrot', 'hot_dog', 'pizza', 'donut', 'cake', 'chair',\n        'couch', 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv',\n        'laptop', 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave',\n        'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',\n        'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'\n    ]\n\n\ndef cityscapes_classes():\n    return [\n        'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',\n        'bicycle'\n    ]\n\n\ndef oid_challenge_classes():\n    return [\n        'Footwear', 'Jeans', 'House', 'Tree', 'Woman', 'Man', 'Land vehicle',\n        'Person', 'Wheel', 'Bus', 'Human face', 'Bird', 'Dress', 'Girl',\n        'Vehicle', 'Building', 'Cat', 'Car', 'Belt', 'Elephant', 'Dessert',\n        'Butterfly', 'Train', 'Guitar', 'Poster', 'Book', 'Boy', 'Bee',\n        'Flower', 'Window', 'Hat', 'Human head', 'Dog', 'Human arm', 'Drink',\n        'Human mouth', 'Human hair', 'Human nose', 'Human hand', 'Table',\n        'Marine invertebrates', 'Fish', 'Sculpture', 'Rose', 'Street light',\n        'Glasses', 'Fountain', 'Skyscraper', 'Swimwear', 'Brassiere', 'Drum',\n        'Duck', 'Countertop', 'Furniture', 'Ball', 'Human leg', 'Boat',\n        'Balloon', 'Bicycle helmet', 'Goggles', 'Door', 'Human eye', 'Shirt',\n        'Toy', 'Teddy bear', 'Pasta', 'Tomato', 'Human ear',\n        'Vehicle registration plate', 'Microphone', 'Musical keyboard',\n        'Tower', 'Houseplant', 'Flowerpot', 'Fruit', 'Vegetable',\n        'Musical instrument', 'Suit', 'Motorcycle', 'Bagel', 'French fries',\n        'Hamburger', 'Chair', 'Salt and pepper shakers', 'Snail', 'Airplane',\n        'Horse', 'Laptop', 'Computer keyboard', 'Football helmet', 'Cocktail',\n        'Juice', 'Tie', 'Computer monitor', 'Human beard', 'Bottle',\n        'Saxophone', 'Lemon', 'Mouse', 'Sock', 'Cowboy hat', 'Sun hat',\n        'Football', 'Porch', 'Sunglasses', 'Lobster', 'Crab', 'Picture frame',\n        'Van', 'Crocodile', 'Surfboard', 'Shorts', 'Helicopter', 'Helmet',\n        'Sports uniform', 'Taxi', 'Swan', 'Goose', 'Coat', 'Jacket', 'Handbag',\n        'Flag', 'Skateboard', 'Television', 'Tire', 'Spoon', 'Palm tree',\n        'Stairs', 'Salad', 'Castle', 'Oven', 'Microwave oven', 'Wine',\n        'Ceiling fan', 'Mechanical fan', 'Cattle', 'Truck', 'Box', 'Ambulance',\n        'Desk', 'Wine glass', 'Reptile', 'Tank', 'Traffic light', 'Billboard',\n        'Tent', 'Insect', 'Spider', 'Treadmill', 'Cupboard', 'Shelf',\n        'Seat belt', 'Human foot', 'Bicycle', 'Bicycle wheel', 'Couch',\n        'Bookcase', 'Fedora', 'Backpack', 'Bench', 'Oyster',\n        'Moths and butterflies', 'Lavender', 'Waffle', 'Fork', 'Animal',\n        'Accordion', 'Mobile phone', 'Plate', 'Coffee cup', 'Saucer',\n        'Platter', 'Dagger', 'Knife', 'Bull', 'Tortoise', 'Sea turtle', 'Deer',\n        'Weapon', 'Apple', 'Ski', 'Taco', 'Traffic sign', 'Beer', 'Necklace',\n        'Sunflower', 'Piano', 'Organ', 'Harpsichord', 'Bed', 'Cabinetry',\n        'Nightstand', 'Curtain', 'Chest of drawers', 'Drawer', 'Parrot',\n        'Sandal', 'High heels', 'Tableware', 'Cart', 'Mushroom', 'Kite',\n        'Missile', 'Seafood', 'Camera', 'Paper towel', 'Toilet paper',\n        'Sombrero', 'Radish', 'Lighthouse', 'Segway', 'Pig', 'Watercraft',\n        'Golf cart', 'studio couch', 'Dolphin', 'Whale', 'Earrings', 'Otter',\n        'Sea lion', 'Whiteboard', 'Monkey', 'Gondola', 'Zebra',\n        'Baseball glove', 'Scarf', 'Adhesive tape', 'Trousers', 'Scoreboard',\n        'Lily', 'Carnivore', 'Power plugs and sockets', 'Office building',\n        'Sandwich', 'Swimming pool', 'Headphones', 'Tin can', 'Crown', 'Doll',\n        'Cake', 'Frog', 'Beetle', 'Ant', 'Gas stove', 'Canoe', 'Falcon',\n        'Blue jay', 'Egg', 'Fire hydrant', 'Raccoon', 'Muffin', 'Wall clock',\n        'Coffee', 'Mug', 'Tea', 'Bear', 'Waste container', 'Home appliance',\n        'Candle', 'Lion', 'Mirror', 'Starfish', 'Marine mammal', 'Wheelchair',\n        'Umbrella', 'Alpaca', 'Violin', 'Cello', 'Brown bear', 'Canary', 'Bat',\n        'Ruler', 'Plastic bag', 'Penguin', 'Watermelon', 'Harbor seal', 'Pen',\n        'Pumpkin', 'Harp', 'Kitchen appliance', 'Roller skates', 'Bust',\n        'Coffee table', 'Tennis ball', 'Tennis racket', 'Ladder', 'Boot',\n        'Bowl', 'Stop sign', 'Volleyball', 'Eagle', 'Paddle', 'Chicken',\n        'Skull', 'Lamp', 'Beehive', 'Maple', 'Sink', 'Goldfish', 'Tripod',\n        'Coconut', 'Bidet', 'Tap', 'Bathroom cabinet', 'Toilet',\n        'Filing cabinet', 'Pretzel', 'Table tennis racket', 'Bronze sculpture',\n        'Rocket', 'Mouse', 'Hamster', 'Lizard', 'Lifejacket', 'Goat',\n        'Washing machine', 'Trumpet', 'Horn', 'Trombone', 'Sheep',\n        'Tablet computer', 'Pillow', 'Kitchen & dining room table',\n        'Parachute', 'Raven', 'Glove', 'Loveseat', 'Christmas tree',\n        'Shellfish', 'Rifle', 'Shotgun', 'Sushi', 'Sparrow', 'Bread',\n        'Toaster', 'Watch', 'Asparagus', 'Artichoke', 'Suitcase', 'Antelope',\n        'Broccoli', 'Ice cream', 'Racket', 'Banana', 'Cookie', 'Cucumber',\n        'Dragonfly', 'Lynx', 'Caterpillar', 'Light bulb', 'Office supplies',\n        'Miniskirt', 'Skirt', 'Fireplace', 'Potato', 'Light switch',\n        'Croissant', 'Cabbage', 'Ladybug', 'Handgun', 'Luggage and bags',\n        'Window blind', 'Snowboard', 'Baseball bat', 'Digital clock',\n        'Serving tray', 'Infant bed', 'Sofa bed', 'Guacamole', 'Fox', 'Pizza',\n        'Snowplow', 'Jet ski', 'Refrigerator', 'Lantern', 'Convenience store',\n        'Sword', 'Rugby ball', 'Owl', 'Ostrich', 'Pancake', 'Strawberry',\n        'Carrot', 'Tart', 'Dice', 'Turkey', 'Rabbit', 'Invertebrate', 'Vase',\n        'Stool', 'Swim cap', 'Shower', 'Clock', 'Jellyfish', 'Aircraft',\n        'Chopsticks', 'Orange', 'Snake', 'Sewing machine', 'Kangaroo', 'Mixer',\n        'Food processor', 'Shrimp', 'Towel', 'Porcupine', 'Jaguar', 'Cannon',\n        'Limousine', 'Mule', 'Squirrel', 'Kitchen knife', 'Tiara', 'Tiger',\n        'Bow and arrow', 'Candy', 'Rhinoceros', 'Shark', 'Cricket ball',\n        'Doughnut', 'Plumbing fixture', 'Camel', 'Polar bear', 'Coin',\n        'Printer', 'Blender', 'Giraffe', 'Billiard table', 'Kettle',\n        'Dinosaur', 'Pineapple', 'Zucchini', 'Jug', 'Barge', 'Teapot',\n        'Golf ball', 'Binoculars', 'Scissors', 'Hot dog', 'Door handle',\n        'Seahorse', 'Bathtub', 'Leopard', 'Centipede', 'Grapefruit', 'Snowman',\n        'Cheetah', 'Alarm clock', 'Grape', 'Wrench', 'Wok', 'Bell pepper',\n        'Cake stand', 'Barrel', 'Woodpecker', 'Flute', 'Corded phone',\n        'Willow', 'Punching bag', 'Pomegranate', 'Telephone', 'Pear',\n        'Common fig', 'Bench', 'Wood-burning stove', 'Burrito', 'Nail',\n        'Turtle', 'Submarine sandwich', 'Drinking straw', 'Peach', 'Popcorn',\n        'Frying pan', 'Picnic basket', 'Honeycomb', 'Envelope', 'Mango',\n        'Cutting board', 'Pitcher', 'Stationary bicycle', 'Dumbbell',\n        'Personal care', 'Dog bed', 'Snowmobile', 'Oboe', 'Briefcase',\n        'Squash', 'Tick', 'Slow cooker', 'Coffeemaker', 'Measuring cup',\n        'Crutch', 'Stretcher', 'Screwdriver', 'Flashlight', 'Spatula',\n        'Pressure cooker', 'Ring binder', 'Beaker', 'Torch', 'Winter melon'\n    ]\n\n\ndef oid_v6_classes():\n    return [\n        'Tortoise', 'Container', 'Magpie', 'Sea turtle', 'Football',\n        'Ambulance', 'Ladder', 'Toothbrush', 'Syringe', 'Sink', 'Toy',\n        'Organ (Musical Instrument)', 'Cassette deck', 'Apple', 'Human eye',\n        'Cosmetics', 'Paddle', 'Snowman', 'Beer', 'Chopsticks', 'Human beard',\n        'Bird', 'Parking meter', 'Traffic light', 'Croissant', 'Cucumber',\n        'Radish', 'Towel', 'Doll', 'Skull', 'Washing machine', 'Glove', 'Tick',\n        'Belt', 'Sunglasses', 'Banjo', 'Cart', 'Ball', 'Backpack', 'Bicycle',\n        'Home appliance', 'Centipede', 'Boat', 'Surfboard', 'Boot',\n        'Headphones', 'Hot dog', 'Shorts', 'Fast food', 'Bus', 'Boy',\n        'Screwdriver', 'Bicycle wheel', 'Barge', 'Laptop', 'Miniskirt',\n        'Drill (Tool)', 'Dress', 'Bear', 'Waffle', 'Pancake', 'Brown bear',\n        'Woodpecker', 'Blue jay', 'Pretzel', 'Bagel', 'Tower', 'Teapot',\n        'Person', 'Bow and arrow', 'Swimwear', 'Beehive', 'Brassiere', 'Bee',\n        'Bat (Animal)', 'Starfish', 'Popcorn', 'Burrito', 'Chainsaw',\n        'Balloon', 'Wrench', 'Tent', 'Vehicle registration plate', 'Lantern',\n        'Toaster', 'Flashlight', 'Billboard', 'Tiara', 'Limousine', 'Necklace',\n        'Carnivore', 'Scissors', 'Stairs', 'Computer keyboard', 'Printer',\n        'Traffic sign', 'Chair', 'Shirt', 'Poster', 'Cheese', 'Sock',\n        'Fire hydrant', 'Land vehicle', 'Earrings', 'Tie', 'Watercraft',\n        'Cabinetry', 'Suitcase', 'Muffin', 'Bidet', 'Snack', 'Snowmobile',\n        'Clock', 'Medical equipment', 'Cattle', 'Cello', 'Jet ski', 'Camel',\n        'Coat', 'Suit', 'Desk', 'Cat', 'Bronze sculpture', 'Juice', 'Gondola',\n        'Beetle', 'Cannon', 'Computer mouse', 'Cookie', 'Office building',\n        'Fountain', 'Coin', 'Calculator', 'Cocktail', 'Computer monitor',\n        'Box', 'Stapler', 'Christmas tree', 'Cowboy hat', 'Hiking equipment',\n        'Studio couch', 'Drum', 'Dessert', 'Wine rack', 'Drink', 'Zucchini',\n        'Ladle', 'Human mouth', 'Dairy Product', 'Dice', 'Oven', 'Dinosaur',\n        'Ratchet (Device)', 'Couch', 'Cricket ball', 'Winter melon', 'Spatula',\n        'Whiteboard', 'Pencil sharpener', 'Door', 'Hat', 'Shower', 'Eraser',\n        'Fedora', 'Guacamole', 'Dagger', 'Scarf', 'Dolphin', 'Sombrero',\n        'Tin can', 'Mug', 'Tap', 'Harbor seal', 'Stretcher', 'Can opener',\n        'Goggles', 'Human body', 'Roller skates', 'Coffee cup',\n        'Cutting board', 'Blender', 'Plumbing fixture', 'Stop sign',\n        'Office supplies', 'Volleyball (Ball)', 'Vase', 'Slow cooker',\n        'Wardrobe', 'Coffee', 'Whisk', 'Paper towel', 'Personal care', 'Food',\n        'Sun hat', 'Tree house', 'Flying disc', 'Skirt', 'Gas stove',\n        'Salt and pepper shakers', 'Mechanical fan', 'Face powder', 'Fax',\n        'Fruit', 'French fries', 'Nightstand', 'Barrel', 'Kite', 'Tart',\n        'Treadmill', 'Fox', 'Flag', 'French horn', 'Window blind',\n        'Human foot', 'Golf cart', 'Jacket', 'Egg (Food)', 'Street light',\n        'Guitar', 'Pillow', 'Human leg', 'Isopod', 'Grape', 'Human ear',\n        'Power plugs and sockets', 'Panda', 'Giraffe', 'Woman', 'Door handle',\n        'Rhinoceros', 'Bathtub', 'Goldfish', 'Houseplant', 'Goat',\n        'Baseball bat', 'Baseball glove', 'Mixing bowl',\n        'Marine invertebrates', 'Kitchen utensil', 'Light switch', 'House',\n        'Horse', 'Stationary bicycle', 'Hammer', 'Ceiling fan', 'Sofa bed',\n        'Adhesive tape', 'Harp', 'Sandal', 'Bicycle helmet', 'Saucer',\n        'Harpsichord', 'Human hair', 'Heater', 'Harmonica', 'Hamster',\n        'Curtain', 'Bed', 'Kettle', 'Fireplace', 'Scale', 'Drinking straw',\n        'Insect', 'Hair dryer', 'Kitchenware', 'Indoor rower', 'Invertebrate',\n        'Food processor', 'Bookcase', 'Refrigerator', 'Wood-burning stove',\n        'Punching bag', 'Common fig', 'Cocktail shaker', 'Jaguar (Animal)',\n        'Golf ball', 'Fashion accessory', 'Alarm clock', 'Filing cabinet',\n        'Artichoke', 'Table', 'Tableware', 'Kangaroo', 'Koala', 'Knife',\n        'Bottle', 'Bottle opener', 'Lynx', 'Lavender (Plant)', 'Lighthouse',\n        'Dumbbell', 'Human head', 'Bowl', 'Humidifier', 'Porch', 'Lizard',\n        'Billiard table', 'Mammal', 'Mouse', 'Motorcycle',\n        'Musical instrument', 'Swim cap', 'Frying pan', 'Snowplow',\n        'Bathroom cabinet', 'Missile', 'Bust', 'Man', 'Waffle iron', 'Milk',\n        'Ring binder', 'Plate', 'Mobile phone', 'Baked goods', 'Mushroom',\n        'Crutch', 'Pitcher (Container)', 'Mirror', 'Personal flotation device',\n        'Table tennis racket', 'Pencil case', 'Musical keyboard', 'Scoreboard',\n        'Briefcase', 'Kitchen knife', 'Nail (Construction)', 'Tennis ball',\n        'Plastic bag', 'Oboe', 'Chest of drawers', 'Ostrich', 'Piano', 'Girl',\n        'Plant', 'Potato', 'Hair spray', 'Sports equipment', 'Pasta',\n        'Penguin', 'Pumpkin', 'Pear', 'Infant bed', 'Polar bear', 'Mixer',\n        'Cupboard', 'Jacuzzi', 'Pizza', 'Digital clock', 'Pig', 'Reptile',\n        'Rifle', 'Lipstick', 'Skateboard', 'Raven', 'High heels', 'Red panda',\n        'Rose', 'Rabbit', 'Sculpture', 'Saxophone', 'Shotgun', 'Seafood',\n        'Submarine sandwich', 'Snowboard', 'Sword', 'Picture frame', 'Sushi',\n        'Loveseat', 'Ski', 'Squirrel', 'Tripod', 'Stethoscope', 'Submarine',\n        'Scorpion', 'Segway', 'Training bench', 'Snake', 'Coffee table',\n        'Skyscraper', 'Sheep', 'Television', 'Trombone', 'Tea', 'Tank', 'Taco',\n        'Telephone', 'Torch', 'Tiger', 'Strawberry', 'Trumpet', 'Tree',\n        'Tomato', 'Train', 'Tool', 'Picnic basket', 'Cooking spray',\n        'Trousers', 'Bowling equipment', 'Football helmet', 'Truck',\n        'Measuring cup', 'Coffeemaker', 'Violin', 'Vehicle', 'Handbag',\n        'Paper cutter', 'Wine', 'Weapon', 'Wheel', 'Worm', 'Wok', 'Whale',\n        'Zebra', 'Auto part', 'Jug', 'Pizza cutter', 'Cream', 'Monkey', 'Lion',\n        'Bread', 'Platter', 'Chicken', 'Eagle', 'Helicopter', 'Owl', 'Duck',\n        'Turtle', 'Hippopotamus', 'Crocodile', 'Toilet', 'Toilet paper',\n        'Squid', 'Clothing', 'Footwear', 'Lemon', 'Spider', 'Deer', 'Frog',\n        'Banana', 'Rocket', 'Wine glass', 'Countertop', 'Tablet computer',\n        'Waste container', 'Swimming pool', 'Dog', 'Book', 'Elephant', 'Shark',\n        'Candle', 'Leopard', 'Axe', 'Hand dryer', 'Soap dispenser',\n        'Porcupine', 'Flower', 'Canary', 'Cheetah', 'Palm tree', 'Hamburger',\n        'Maple', 'Building', 'Fish', 'Lobster', 'Garden Asparagus',\n        'Furniture', 'Hedgehog', 'Airplane', 'Spoon', 'Otter', 'Bull',\n        'Oyster', 'Horizontal bar', 'Convenience store', 'Bomb', 'Bench',\n        'Ice cream', 'Caterpillar', 'Butterfly', 'Parachute', 'Orange',\n        'Antelope', 'Beaker', 'Moths and butterflies', 'Window', 'Closet',\n        'Castle', 'Jellyfish', 'Goose', 'Mule', 'Swan', 'Peach', 'Coconut',\n        'Seat belt', 'Raccoon', 'Chisel', 'Fork', 'Lamp', 'Camera',\n        'Squash (Plant)', 'Racket', 'Human face', 'Human arm', 'Vegetable',\n        'Diaper', 'Unicycle', 'Falcon', 'Chime', 'Snail', 'Shellfish',\n        'Cabbage', 'Carrot', 'Mango', 'Jeans', 'Flowerpot', 'Pineapple',\n        'Drawer', 'Stool', 'Envelope', 'Cake', 'Dragonfly', 'Common sunflower',\n        'Microwave oven', 'Honeycomb', 'Marine mammal', 'Sea lion', 'Ladybug',\n        'Shelf', 'Watch', 'Candy', 'Salad', 'Parrot', 'Handgun', 'Sparrow',\n        'Van', 'Grinder', 'Spice rack', 'Light bulb', 'Corded phone',\n        'Sports uniform', 'Tennis racket', 'Wall clock', 'Serving tray',\n        'Kitchen & dining room table', 'Dog bed', 'Cake stand',\n        'Cat furniture', 'Bathroom accessory', 'Facial tissue holder',\n        'Pressure cooker', 'Kitchen appliance', 'Tire', 'Ruler',\n        'Luggage and bags', 'Microphone', 'Broccoli', 'Umbrella', 'Pastry',\n        'Grapefruit', 'Band-aid', 'Animal', 'Bell pepper', 'Turkey', 'Lily',\n        'Pomegranate', 'Doughnut', 'Glasses', 'Human nose', 'Pen', 'Ant',\n        'Car', 'Aircraft', 'Human hand', 'Skunk', 'Teddy bear', 'Watermelon',\n        'Cantaloupe', 'Dishwasher', 'Flute', 'Balance beam', 'Sandwich',\n        'Shrimp', 'Sewing machine', 'Binoculars', 'Rays and skates', 'Ipod',\n        'Accordion', 'Willow', 'Crab', 'Crown', 'Seahorse', 'Perfume',\n        'Alpaca', 'Taxi', 'Canoe', 'Remote control', 'Wheelchair',\n        'Rugby ball', 'Armadillo', 'Maracas', 'Helmet'\n    ]\n\n\ndataset_aliases = {\n    'voc': ['voc', 'pascal_voc', 'voc07', 'voc12'],\n    'imagenet_det': ['det', 'imagenet_det', 'ilsvrc_det'],\n    'imagenet_vid': ['vid', 'imagenet_vid', 'ilsvrc_vid'],\n    'coco': ['coco', 'mscoco', 'ms_coco'],\n    'wider_face': ['WIDERFaceDataset', 'wider_face', 'WIDERFace'],\n    'cityscapes': ['cityscapes'],\n    'oid_challenge': ['oid_challenge', 'openimages_challenge'],\n    'oid_v6': ['oid_v6', 'openimages_v6']\n}\n\n\ndef get_classes(dataset):\n    \"\"\"Get class names of a dataset.\"\"\"\n    alias2name = {}\n    for name, aliases in dataset_aliases.items():\n        for alias in aliases:\n            alias2name[alias] = name\n\n    if mmcv.is_str(dataset):\n        if dataset in alias2name:\n            labels = eval(alias2name[dataset] + '_classes()')\n        else:\n            raise ValueError(f'Unrecognized dataset: {dataset}')\n    else:\n        raise TypeError(f'dataset must a str, but got {type(dataset)}')\n    return labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/eval_hooks.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport bisect\nimport os.path as osp\n\nimport mmcv\nimport torch.distributed as dist\nfrom mmcv.runner import DistEvalHook as BaseDistEvalHook\nfrom mmcv.runner import EvalHook as BaseEvalHook\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\n\ndef _calc_dynamic_intervals(start_interval, dynamic_interval_list):\n    assert mmcv.is_list_of(dynamic_interval_list, tuple)\n\n    dynamic_milestones = [0]\n    dynamic_milestones.extend(\n        [dynamic_interval[0] for dynamic_interval in dynamic_interval_list])\n    dynamic_intervals = [start_interval]\n    dynamic_intervals.extend(\n        [dynamic_interval[1] for dynamic_interval in dynamic_interval_list])\n    return dynamic_milestones, dynamic_intervals\n\n\nclass EvalHook(BaseEvalHook):\n\n    def __init__(self, *args, dynamic_intervals=None, **kwargs):\n        super(EvalHook, self).__init__(*args, **kwargs)\n        self.latest_results = None\n\n        self.use_dynamic_intervals = dynamic_intervals is not None\n        if self.use_dynamic_intervals:\n            self.dynamic_milestones, self.dynamic_intervals = \\\n                _calc_dynamic_intervals(self.interval, dynamic_intervals)\n\n    def _decide_interval(self, runner):\n        if self.use_dynamic_intervals:\n            progress = runner.epoch if self.by_epoch else runner.iter\n            step = bisect.bisect(self.dynamic_milestones, (progress + 1))\n            # Dynamically modify the evaluation interval\n            self.interval = self.dynamic_intervals[step - 1]\n\n    def before_train_epoch(self, runner):\n        \"\"\"Evaluate the model only at the start of training by epoch.\"\"\"\n        self._decide_interval(runner)\n        super().before_train_epoch(runner)\n\n    def before_train_iter(self, runner):\n        self._decide_interval(runner)\n        super().before_train_iter(runner)\n\n    def _do_evaluate(self, runner):\n        \"\"\"perform evaluation and save ckpt.\"\"\"\n        if not self._should_evaluate(runner):\n            return\n\n        from mmdet.apis import single_gpu_test\n\n        # Changed results to self.results so that MMDetWandbHook can access\n        # the evaluation results and log them to wandb.\n        results = single_gpu_test(runner.model, self.dataloader, show=False)\n        self.latest_results = results\n        runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)\n        key_score = self.evaluate(runner, results)\n        # the key_score may be `None` so it needs to skip the action to save\n        # the best checkpoint\n        if self.save_best and key_score:\n            self._save_ckpt(runner, key_score)\n\n\n# Note: Considering that MMCV's EvalHook updated its interface in V1.3.16,\n# in order to avoid strong version dependency, we did not directly\n# inherit EvalHook but BaseDistEvalHook.\nclass DistEvalHook(BaseDistEvalHook):\n\n    def __init__(self, *args, dynamic_intervals=None, **kwargs):\n        super(DistEvalHook, self).__init__(*args, **kwargs)\n        self.latest_results = None\n\n        self.use_dynamic_intervals = dynamic_intervals is not None\n        if self.use_dynamic_intervals:\n            self.dynamic_milestones, self.dynamic_intervals = \\\n                _calc_dynamic_intervals(self.interval, dynamic_intervals)\n\n    def _decide_interval(self, runner):\n        if self.use_dynamic_intervals:\n            progress = runner.epoch if self.by_epoch else runner.iter\n            step = bisect.bisect(self.dynamic_milestones, (progress + 1))\n            # Dynamically modify the evaluation interval\n            self.interval = self.dynamic_intervals[step - 1]\n\n    def before_train_epoch(self, runner):\n        \"\"\"Evaluate the model only at the start of training by epoch.\"\"\"\n        self._decide_interval(runner)\n        super().before_train_epoch(runner)\n\n    def before_train_iter(self, runner):\n        self._decide_interval(runner)\n        super().before_train_iter(runner)\n\n    def _do_evaluate(self, runner):\n        \"\"\"perform evaluation and save ckpt.\"\"\"\n        # Synchronization of BatchNorm's buffer (running_mean\n        # and running_var) is not supported in the DDP of pytorch,\n        # which may cause the inconsistent performance of models in\n        # different ranks, so we broadcast BatchNorm's buffers\n        # of rank 0 to other ranks to avoid this.\n        if self.broadcast_bn_buffer:\n            model = runner.model\n            for name, module in model.named_modules():\n                if isinstance(module,\n                              _BatchNorm) and module.track_running_stats:\n                    dist.broadcast(module.running_var, 0)\n                    dist.broadcast(module.running_mean, 0)\n\n        if not self._should_evaluate(runner):\n            return\n\n        tmpdir = self.tmpdir\n        if tmpdir is None:\n            tmpdir = osp.join(runner.work_dir, '.eval_hook')\n\n        from mmdet.apis import multi_gpu_test\n\n        # Changed results to self.results so that MMDetWandbHook can access\n        # the evaluation results and log them to wandb.\n        results = multi_gpu_test(\n            runner.model,\n            self.dataloader,\n            tmpdir=tmpdir,\n            gpu_collect=self.gpu_collect)\n        self.latest_results = results\n        if runner.rank == 0:\n            print('\\n')\n            runner.log_buffer.output['eval_iter_num'] = len(self.dataloader)\n            key_score = self.evaluate(runner, results)\n\n            # the key_score may be `None` so it needs to skip\n            # the action to save the best checkpoint\n            if self.save_best and key_score:\n                self._save_ckpt(runner, key_score)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/mean_ap.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom multiprocessing import Pool\n\nimport mmcv\nimport numpy as np\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\nfrom .class_names import get_classes\n\n\ndef average_precision(recalls, precisions, mode='area'):\n    \"\"\"Calculate average precision (for single or multiple scales).\n\n    Args:\n        recalls (ndarray): shape (num_scales, num_dets) or (num_dets, )\n        precisions (ndarray): shape (num_scales, num_dets) or (num_dets, )\n        mode (str): 'area' or '11points', 'area' means calculating the area\n            under precision-recall curve, '11points' means calculating\n            the average precision of recalls at [0, 0.1, ..., 1]\n\n    Returns:\n        float or ndarray: calculated average precision\n    \"\"\"\n    no_scale = False\n    if recalls.ndim == 1:\n        no_scale = True\n        recalls = recalls[np.newaxis, :]\n        precisions = precisions[np.newaxis, :]\n    assert recalls.shape == precisions.shape and recalls.ndim == 2\n    num_scales = recalls.shape[0]\n    ap = np.zeros(num_scales, dtype=np.float32)\n    if mode == 'area':\n        zeros = np.zeros((num_scales, 1), dtype=recalls.dtype)\n        ones = np.ones((num_scales, 1), dtype=recalls.dtype)\n        mrec = np.hstack((zeros, recalls, ones))\n        mpre = np.hstack((zeros, precisions, zeros))\n        for i in range(mpre.shape[1] - 1, 0, -1):\n            mpre[:, i - 1] = np.maximum(mpre[:, i - 1], mpre[:, i])\n        for i in range(num_scales):\n            ind = np.where(mrec[i, 1:] != mrec[i, :-1])[0]\n            ap[i] = np.sum(\n                (mrec[i, ind + 1] - mrec[i, ind]) * mpre[i, ind + 1])\n    elif mode == '11points':\n        for i in range(num_scales):\n            for thr in np.arange(0, 1 + 1e-3, 0.1):\n                precs = precisions[i, recalls[i, :] >= thr]\n                prec = precs.max() if precs.size > 0 else 0\n                ap[i] += prec\n        ap /= 11\n    else:\n        raise ValueError(\n            'Unrecognized mode, only \"area\" and \"11points\" are supported')\n    if no_scale:\n        ap = ap[0]\n    return ap\n\n\ndef tpfp_imagenet(det_bboxes,\n                  gt_bboxes,\n                  gt_bboxes_ignore=None,\n                  default_iou_thr=0.5,\n                  area_ranges=None,\n                  use_legacy_coordinate=False,\n                  **kwargs):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        default_iou_thr (float): IoU threshold to be considered as matched for\n            medium and large bboxes (small ones have special rules).\n            Default: 0.5.\n        area_ranges (list[tuple] | None): Range of bbox areas to be evaluated,\n            in the format [(min1, max1), (min2, max2), ...]. Default: None.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n\n    if not use_legacy_coordinate:\n        extra_length = 0.\n    else:\n        extra_length = 1.\n\n    # an indicator of ignored gts\n    gt_ignore_inds = np.concatenate(\n        (np.zeros(gt_bboxes.shape[0], dtype=np.bool),\n         np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool)))\n    # stack gt_bboxes and gt_bboxes_ignore for convenience\n    gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))\n\n    num_dets = det_bboxes.shape[0]\n    num_gts = gt_bboxes.shape[0]\n    if area_ranges is None:\n        area_ranges = [(None, None)]\n    num_scales = len(area_ranges)\n    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp\n    # of a certain scale.\n    tp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    fp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    if gt_bboxes.shape[0] == 0:\n        if area_ranges == [(None, None)]:\n            fp[...] = 1\n        else:\n            det_areas = (\n                det_bboxes[:, 2] - det_bboxes[:, 0] + extra_length) * (\n                    det_bboxes[:, 3] - det_bboxes[:, 1] + extra_length)\n            for i, (min_area, max_area) in enumerate(area_ranges):\n                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1\n        return tp, fp\n    ious = bbox_overlaps(\n        det_bboxes, gt_bboxes - 1, use_legacy_coordinate=use_legacy_coordinate)\n    gt_w = gt_bboxes[:, 2] - gt_bboxes[:, 0] + extra_length\n    gt_h = gt_bboxes[:, 3] - gt_bboxes[:, 1] + extra_length\n    iou_thrs = np.minimum((gt_w * gt_h) / ((gt_w + 10.0) * (gt_h + 10.0)),\n                          default_iou_thr)\n    # sort all detections by scores in descending order\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n    for k, (min_area, max_area) in enumerate(area_ranges):\n        gt_covered = np.zeros(num_gts, dtype=bool)\n        # if no area range is specified, gt_area_ignore is all False\n        if min_area is None:\n            gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)\n        else:\n            gt_areas = gt_w * gt_h\n            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)\n        for i in sort_inds:\n            max_iou = -1\n            matched_gt = -1\n            # find best overlapped available gt\n            for j in range(num_gts):\n                # different from PASCAL VOC: allow finding other gts if the\n                # best overlapped ones are already matched by other det bboxes\n                if gt_covered[j]:\n                    continue\n                elif ious[i, j] >= iou_thrs[j] and ious[i, j] > max_iou:\n                    max_iou = ious[i, j]\n                    matched_gt = j\n            # there are 4 cases for a det bbox:\n            # 1. it matches a gt, tp = 1, fp = 0\n            # 2. it matches an ignored gt, tp = 0, fp = 0\n            # 3. it matches no gt and within area range, tp = 0, fp = 1\n            # 4. it matches no gt but is beyond area range, tp = 0, fp = 0\n            if matched_gt >= 0:\n                gt_covered[matched_gt] = 1\n                if not (gt_ignore_inds[matched_gt]\n                        or gt_area_ignore[matched_gt]):\n                    tp[k, i] = 1\n            elif min_area is None:\n                fp[k, i] = 1\n            else:\n                bbox = det_bboxes[i, :4]\n                area = (bbox[2] - bbox[0] + extra_length) * (\n                    bbox[3] - bbox[1] + extra_length)\n                if area >= min_area and area < max_area:\n                    fp[k, i] = 1\n    return tp, fp\n\n\ndef tpfp_default(det_bboxes,\n                 gt_bboxes,\n                 gt_bboxes_ignore=None,\n                 iou_thr=0.5,\n                 area_ranges=None,\n                 use_legacy_coordinate=False,\n                 **kwargs):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        area_ranges (list[tuple] | None): Range of bbox areas to be\n            evaluated, in the format [(min1, max1), (min2, max2), ...].\n            Default: None.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n\n    Returns:\n        tuple[np.ndarray]: (tp, fp) whose elements are 0 and 1. The shape of\n        each array is (num_scales, m).\n    \"\"\"\n\n    if not use_legacy_coordinate:\n        extra_length = 0.\n    else:\n        extra_length = 1.\n\n    # an indicator of ignored gts\n    gt_ignore_inds = np.concatenate(\n        (np.zeros(gt_bboxes.shape[0], dtype=np.bool),\n         np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool)))\n    # stack gt_bboxes and gt_bboxes_ignore for convenience\n    gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))\n\n    num_dets = det_bboxes.shape[0]\n    num_gts = gt_bboxes.shape[0]\n    if area_ranges is None:\n        area_ranges = [(None, None)]\n    num_scales = len(area_ranges)\n    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of\n    # a certain scale\n    tp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    fp = np.zeros((num_scales, num_dets), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    if gt_bboxes.shape[0] == 0:\n        if area_ranges == [(None, None)]:\n            fp[...] = 1\n        else:\n            det_areas = (\n                det_bboxes[:, 2] - det_bboxes[:, 0] + extra_length) * (\n                    det_bboxes[:, 3] - det_bboxes[:, 1] + extra_length)\n            for i, (min_area, max_area) in enumerate(area_ranges):\n                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1\n        return tp, fp\n\n    ious = bbox_overlaps(\n        det_bboxes, gt_bboxes, use_legacy_coordinate=use_legacy_coordinate)\n    # for each det, the max iou with all gts\n    ious_max = ious.max(axis=1)\n    # for each det, which gt overlaps most with it\n    ious_argmax = ious.argmax(axis=1)\n    # sort all dets in descending order by scores\n    sort_inds = np.argsort(-det_bboxes[:, -1])\n    for k, (min_area, max_area) in enumerate(area_ranges):\n        gt_covered = np.zeros(num_gts, dtype=bool)\n        # if no area range is specified, gt_area_ignore is all False\n        if min_area is None:\n            gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)\n        else:\n            gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0] + extra_length) * (\n                gt_bboxes[:, 3] - gt_bboxes[:, 1] + extra_length)\n            gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)\n        for i in sort_inds:\n            if ious_max[i] >= iou_thr:\n                matched_gt = ious_argmax[i]\n                if not (gt_ignore_inds[matched_gt]\n                        or gt_area_ignore[matched_gt]):\n                    if not gt_covered[matched_gt]:\n                        gt_covered[matched_gt] = True\n                        tp[k, i] = 1\n                    else:\n                        fp[k, i] = 1\n                # otherwise ignore this detected bbox, tp = 0, fp = 0\n            elif min_area is None:\n                fp[k, i] = 1\n            else:\n                bbox = det_bboxes[i, :4]\n                area = (bbox[2] - bbox[0] + extra_length) * (\n                    bbox[3] - bbox[1] + extra_length)\n                if area >= min_area and area < max_area:\n                    fp[k, i] = 1\n    return tp, fp\n\n\ndef tpfp_openimages(det_bboxes,\n                    gt_bboxes,\n                    gt_bboxes_ignore=None,\n                    iou_thr=0.5,\n                    area_ranges=None,\n                    use_legacy_coordinate=False,\n                    gt_bboxes_group_of=None,\n                    use_group_of=True,\n                    ioa_thr=0.5,\n                    **kwargs):\n    \"\"\"Check if detected bboxes are true positive or false positive.\n\n    Args:\n        det_bbox (ndarray): Detected bboxes of this image, of shape (m, 5).\n        gt_bboxes (ndarray): GT bboxes of this image, of shape (n, 4).\n        gt_bboxes_ignore (ndarray): Ignored gt bboxes of this image,\n            of shape (k, 4). Default: None\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        area_ranges (list[tuple] | None): Range of bbox areas to be\n            evaluated, in the format [(min1, max1), (min2, max2), ...].\n            Default: None.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n        gt_bboxes_group_of (ndarray): GT group_of of this image, of shape\n            (k, 1). Default: None\n        use_group_of (bool): Whether to use group of when calculate TP and FP,\n            which only used in OpenImages evaluation. Default: True.\n        ioa_thr (float | None): IoA threshold to be considered as matched,\n            which only used in OpenImages evaluation. Default: 0.5.\n\n    Returns:\n        tuple[np.ndarray]: Returns a tuple (tp, fp, det_bboxes), where\n        (tp, fp) whose elements are 0 and 1. The shape of each array is\n        (num_scales, m). (det_bboxes) whose will filter those are not\n        matched by group of gts when processing Open Images evaluation.\n        The shape is (num_scales, m).\n    \"\"\"\n\n    if not use_legacy_coordinate:\n        extra_length = 0.\n    else:\n        extra_length = 1.\n\n    # an indicator of ignored gts\n    gt_ignore_inds = np.concatenate(\n        (np.zeros(gt_bboxes.shape[0], dtype=np.bool),\n         np.ones(gt_bboxes_ignore.shape[0], dtype=np.bool)))\n    # stack gt_bboxes and gt_bboxes_ignore for convenience\n    gt_bboxes = np.vstack((gt_bboxes, gt_bboxes_ignore))\n\n    num_dets = det_bboxes.shape[0]\n    num_gts = gt_bboxes.shape[0]\n    if area_ranges is None:\n        area_ranges = [(None, None)]\n    num_scales = len(area_ranges)\n    # tp and fp are of shape (num_scales, num_gts), each row is tp or fp of\n    # a certain scale\n    tp = np.zeros((num_scales, num_dets), dtype=np.float32)\n    fp = np.zeros((num_scales, num_dets), dtype=np.float32)\n\n    # if there is no gt bboxes in this image, then all det bboxes\n    # within area range are false positives\n    if gt_bboxes.shape[0] == 0:\n        if area_ranges == [(None, None)]:\n            fp[...] = 1\n        else:\n            det_areas = (\n                det_bboxes[:, 2] - det_bboxes[:, 0] + extra_length) * (\n                    det_bboxes[:, 3] - det_bboxes[:, 1] + extra_length)\n            for i, (min_area, max_area) in enumerate(area_ranges):\n                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1\n        return tp, fp, det_bboxes\n\n    if gt_bboxes_group_of is not None and use_group_of:\n        # if handle group-of boxes, divided gt boxes into two parts:\n        # non-group-of and group-of.Then calculate ious and ioas through\n        # non-group-of group-of gts respectively. This only used in\n        # OpenImages evaluation.\n        assert gt_bboxes_group_of.shape[0] == gt_bboxes.shape[0]\n        non_group_gt_bboxes = gt_bboxes[~gt_bboxes_group_of]\n        group_gt_bboxes = gt_bboxes[gt_bboxes_group_of]\n        num_gts_group = group_gt_bboxes.shape[0]\n        ious = bbox_overlaps(det_bboxes, non_group_gt_bboxes)\n        ioas = bbox_overlaps(det_bboxes, group_gt_bboxes, mode='iof')\n    else:\n        # if not consider group-of boxes, only calculate ious through gt boxes\n        ious = bbox_overlaps(\n            det_bboxes, gt_bboxes, use_legacy_coordinate=use_legacy_coordinate)\n        ioas = None\n\n    if ious.shape[1] > 0:\n        # for each det, the max iou with all gts\n        ious_max = ious.max(axis=1)\n        # for each det, which gt overlaps most with it\n        ious_argmax = ious.argmax(axis=1)\n        # sort all dets in descending order by scores\n        sort_inds = np.argsort(-det_bboxes[:, -1])\n        for k, (min_area, max_area) in enumerate(area_ranges):\n            gt_covered = np.zeros(num_gts, dtype=bool)\n            # if no area range is specified, gt_area_ignore is all False\n            if min_area is None:\n                gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)\n            else:\n                gt_areas = (\n                    gt_bboxes[:, 2] - gt_bboxes[:, 0] + extra_length) * (\n                        gt_bboxes[:, 3] - gt_bboxes[:, 1] + extra_length)\n                gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)\n            for i in sort_inds:\n                if ious_max[i] >= iou_thr:\n                    matched_gt = ious_argmax[i]\n                    if not (gt_ignore_inds[matched_gt]\n                            or gt_area_ignore[matched_gt]):\n                        if not gt_covered[matched_gt]:\n                            gt_covered[matched_gt] = True\n                            tp[k, i] = 1\n                        else:\n                            fp[k, i] = 1\n                    # otherwise ignore this detected bbox, tp = 0, fp = 0\n                elif min_area is None:\n                    fp[k, i] = 1\n                else:\n                    bbox = det_bboxes[i, :4]\n                    area = (bbox[2] - bbox[0] + extra_length) * (\n                        bbox[3] - bbox[1] + extra_length)\n                    if area >= min_area and area < max_area:\n                        fp[k, i] = 1\n    else:\n        # if there is no no-group-of gt bboxes in this image,\n        # then all det bboxes within area range are false positives.\n        # Only used in OpenImages evaluation.\n        if area_ranges == [(None, None)]:\n            fp[...] = 1\n        else:\n            det_areas = (\n                det_bboxes[:, 2] - det_bboxes[:, 0] + extra_length) * (\n                    det_bboxes[:, 3] - det_bboxes[:, 1] + extra_length)\n            for i, (min_area, max_area) in enumerate(area_ranges):\n                fp[i, (det_areas >= min_area) & (det_areas < max_area)] = 1\n\n    if ioas is None or ioas.shape[1] <= 0:\n        return tp, fp, det_bboxes\n    else:\n        # The evaluation of group-of TP and FP are done in two stages:\n        # 1. All detections are first matched to non group-of boxes; true\n        #    positives are determined.\n        # 2. Detections that are determined as false positives are matched\n        #    against group-of boxes and calculated group-of TP and FP.\n        # Only used in OpenImages evaluation.\n        det_bboxes_group = np.zeros(\n            (num_scales, ioas.shape[1], det_bboxes.shape[1]), dtype=float)\n        match_group_of = np.zeros((num_scales, num_dets), dtype=bool)\n        tp_group = np.zeros((num_scales, num_gts_group), dtype=np.float32)\n        ioas_max = ioas.max(axis=1)\n        # for each det, which gt overlaps most with it\n        ioas_argmax = ioas.argmax(axis=1)\n        # sort all dets in descending order by scores\n        sort_inds = np.argsort(-det_bboxes[:, -1])\n        for k, (min_area, max_area) in enumerate(area_ranges):\n            box_is_covered = tp[k]\n            # if no area range is specified, gt_area_ignore is all False\n            if min_area is None:\n                gt_area_ignore = np.zeros_like(gt_ignore_inds, dtype=bool)\n            else:\n                gt_areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (\n                    gt_bboxes[:, 3] - gt_bboxes[:, 1])\n                gt_area_ignore = (gt_areas < min_area) | (gt_areas >= max_area)\n            for i in sort_inds:\n                matched_gt = ioas_argmax[i]\n                if not box_is_covered[i]:\n                    if ioas_max[i] >= ioa_thr:\n                        if not (gt_ignore_inds[matched_gt]\n                                or gt_area_ignore[matched_gt]):\n                            if not tp_group[k, matched_gt]:\n                                tp_group[k, matched_gt] = 1\n                                match_group_of[k, i] = True\n                            else:\n                                match_group_of[k, i] = True\n\n                            if det_bboxes_group[k, matched_gt, -1] < \\\n                                    det_bboxes[i, -1]:\n                                det_bboxes_group[k, matched_gt] = \\\n                                    det_bboxes[i]\n\n        fp_group = (tp_group <= 0).astype(float)\n        tps = []\n        fps = []\n        # concatenate tp, fp, and det-boxes which not matched group of\n        # gt boxes and tp_group, fp_group, and det_bboxes_group which\n        # matched group of boxes respectively.\n        for i in range(num_scales):\n            tps.append(\n                np.concatenate((tp[i][~match_group_of[i]], tp_group[i])))\n            fps.append(\n                np.concatenate((fp[i][~match_group_of[i]], fp_group[i])))\n            det_bboxes = np.concatenate(\n                (det_bboxes[~match_group_of[i]], det_bboxes_group[i]))\n\n        tp = np.vstack(tps)\n        fp = np.vstack(fps)\n        return tp, fp, det_bboxes\n\n\ndef get_cls_results(det_results, annotations, class_id):\n    \"\"\"Get det results and gt information of a certain class.\n\n    Args:\n        det_results (list[list]): Same as `eval_map()`.\n        annotations (list[dict]): Same as `eval_map()`.\n        class_id (int): ID of a specific class.\n\n    Returns:\n        tuple[list[np.ndarray]]: detected bboxes, gt bboxes, ignored gt bboxes\n    \"\"\"\n    cls_dets = [img_res[class_id] for img_res in det_results]\n    cls_gts = []\n    cls_gts_ignore = []\n    for ann in annotations:\n        gt_inds = ann['labels'] == class_id\n        cls_gts.append(ann['bboxes'][gt_inds, :])\n\n        if ann.get('labels_ignore', None) is not None:\n            ignore_inds = ann['labels_ignore'] == class_id\n            cls_gts_ignore.append(ann['bboxes_ignore'][ignore_inds, :])\n        else:\n            cls_gts_ignore.append(np.empty((0, 4), dtype=np.float32))\n\n    return cls_dets, cls_gts, cls_gts_ignore\n\n\ndef get_cls_group_ofs(annotations, class_id):\n    \"\"\"Get `gt_group_of` of a certain class, which is used in Open Images.\n\n    Args:\n        annotations (list[dict]): Same as `eval_map()`.\n        class_id (int): ID of a specific class.\n\n    Returns:\n        list[np.ndarray]: `gt_group_of` of a certain class.\n    \"\"\"\n    gt_group_ofs = []\n    for ann in annotations:\n        gt_inds = ann['labels'] == class_id\n        if ann.get('gt_is_group_ofs', None) is not None:\n            gt_group_ofs.append(ann['gt_is_group_ofs'][gt_inds])\n        else:\n            gt_group_ofs.append(np.empty((0, 1), dtype=np.bool))\n\n    return gt_group_ofs\n\n\ndef eval_map(det_results,\n             annotations,\n             scale_ranges=None,\n             iou_thr=0.5,\n             ioa_thr=None,\n             dataset=None,\n             logger=None,\n             tpfp_fn=None,\n             nproc=4,\n             use_legacy_coordinate=False,\n             use_group_of=False):\n    \"\"\"Evaluate mAP of a dataset.\n\n    Args:\n        det_results (list[list]): [[cls1_det, cls2_det, ...], ...].\n            The outer list indicates images, and the inner list indicates\n            per-class detected bboxes.\n        annotations (list[dict]): Ground truth annotations where each item of\n            the list indicates an image. Keys of annotations are:\n\n            - `bboxes`: numpy array of shape (n, 4)\n            - `labels`: numpy array of shape (n, )\n            - `bboxes_ignore` (optional): numpy array of shape (k, 4)\n            - `labels_ignore` (optional): numpy array of shape (k, )\n        scale_ranges (list[tuple] | None): Range of scales to be evaluated,\n            in the format [(min1, max1), (min2, max2), ...]. A range of\n            (32, 64) means the area range between (32**2, 64**2).\n            Default: None.\n        iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        ioa_thr (float | None): IoA threshold to be considered as matched,\n            which only used in OpenImages evaluation. Default: None.\n        dataset (list[str] | str | None): Dataset name or dataset classes,\n            there are minor differences in metrics for different datasets, e.g.\n            \"voc07\", \"imagenet_det\", etc. Default: None.\n        logger (logging.Logger | str | None): The way to print the mAP\n            summary. See `mmcv.utils.print_log()` for details. Default: None.\n        tpfp_fn (callable | None): The function used to determine true/\n            false positives. If None, :func:`tpfp_default` is used as default\n            unless dataset is 'det' or 'vid' (:func:`tpfp_imagenet` in this\n            case). If it is given as a function, then this function is used\n            to evaluate tp & fp. Default None.\n        nproc (int): Processes used for computing TP and FP.\n            Default: 4.\n        use_legacy_coordinate (bool): Whether to use coordinate system in\n            mmdet v1.x. which means width, height should be\n            calculated as 'x2 - x1 + 1` and 'y2 - y1 + 1' respectively.\n            Default: False.\n        use_group_of (bool): Whether to use group of when calculate TP and FP,\n            which only used in OpenImages evaluation. Default: False.\n\n    Returns:\n        tuple: (mAP, [dict, dict, ...])\n    \"\"\"\n    assert len(det_results) == len(annotations)\n    if not use_legacy_coordinate:\n        extra_length = 0.\n    else:\n        extra_length = 1.\n\n    num_imgs = len(det_results)\n    num_scales = len(scale_ranges) if scale_ranges is not None else 1\n    num_classes = len(det_results[0])  # positive class num\n    area_ranges = ([(rg[0]**2, rg[1]**2) for rg in scale_ranges]\n                   if scale_ranges is not None else None)\n\n    # There is no need to use multi processes to process\n    # when num_imgs = 1 .\n    if num_imgs > 1:\n        assert nproc > 0, 'nproc must be at least one.'\n        nproc = min(nproc, num_imgs)\n        pool = Pool(nproc)\n\n    eval_results = []\n    for i in range(num_classes):\n        # get gt and det bboxes of this class\n        cls_dets, cls_gts, cls_gts_ignore = get_cls_results(\n            det_results, annotations, i)\n        # choose proper function according to datasets to compute tp and fp\n        if tpfp_fn is None:\n            if dataset in ['det', 'vid']:\n                tpfp_fn = tpfp_imagenet\n            elif dataset in ['oid_challenge', 'oid_v6'] \\\n                    or use_group_of is True:\n                tpfp_fn = tpfp_openimages\n            else:\n                tpfp_fn = tpfp_default\n        if not callable(tpfp_fn):\n            raise ValueError(\n                f'tpfp_fn has to be a function or None, but got {tpfp_fn}')\n\n        if num_imgs > 1:\n            # compute tp and fp for each image with multiple processes\n            args = []\n            if use_group_of:\n                # used in Open Images Dataset evaluation\n                gt_group_ofs = get_cls_group_ofs(annotations, i)\n                args.append(gt_group_ofs)\n                args.append([use_group_of for _ in range(num_imgs)])\n            if ioa_thr is not None:\n                args.append([ioa_thr for _ in range(num_imgs)])\n\n            tpfp = pool.starmap(\n                tpfp_fn,\n                zip(cls_dets, cls_gts, cls_gts_ignore,\n                    [iou_thr for _ in range(num_imgs)],\n                    [area_ranges for _ in range(num_imgs)],\n                    [use_legacy_coordinate for _ in range(num_imgs)], *args))\n        else:\n            tpfp = tpfp_fn(\n                cls_dets[0],\n                cls_gts[0],\n                cls_gts_ignore[0],\n                iou_thr,\n                area_ranges,\n                use_legacy_coordinate,\n                gt_bboxes_group_of=(get_cls_group_ofs(annotations, i)[0]\n                                    if use_group_of else None),\n                use_group_of=use_group_of,\n                ioa_thr=ioa_thr)\n            tpfp = [tpfp]\n\n        if use_group_of:\n            tp, fp, cls_dets = tuple(zip(*tpfp))\n        else:\n            tp, fp = tuple(zip(*tpfp))\n        # calculate gt number of each scale\n        # ignored gts or gts beyond the specific scale are not counted\n        num_gts = np.zeros(num_scales, dtype=int)\n        for j, bbox in enumerate(cls_gts):\n            if area_ranges is None:\n                num_gts[0] += bbox.shape[0]\n            else:\n                gt_areas = (bbox[:, 2] - bbox[:, 0] + extra_length) * (\n                    bbox[:, 3] - bbox[:, 1] + extra_length)\n                for k, (min_area, max_area) in enumerate(area_ranges):\n                    num_gts[k] += np.sum((gt_areas >= min_area)\n                                         & (gt_areas < max_area))\n        # sort all det bboxes by score, also sort tp and fp\n        cls_dets = np.vstack(cls_dets)\n        num_dets = cls_dets.shape[0]\n        sort_inds = np.argsort(-cls_dets[:, -1])\n        tp = np.hstack(tp)[:, sort_inds]\n        fp = np.hstack(fp)[:, sort_inds]\n        # calculate recall and precision with tp and fp\n        tp = np.cumsum(tp, axis=1)\n        fp = np.cumsum(fp, axis=1)\n        eps = np.finfo(np.float32).eps\n        recalls = tp / np.maximum(num_gts[:, np.newaxis], eps)\n        precisions = tp / np.maximum((tp + fp), eps)\n        # calculate AP\n        if scale_ranges is None:\n            recalls = recalls[0, :]\n            precisions = precisions[0, :]\n            num_gts = num_gts.item()\n        mode = 'area' if dataset != 'voc07' else '11points'\n        ap = average_precision(recalls, precisions, mode)\n        eval_results.append({\n            'num_gts': num_gts,\n            'num_dets': num_dets,\n            'recall': recalls,\n            'precision': precisions,\n            'ap': ap\n        })\n\n    if num_imgs > 1:\n        pool.close()\n\n    if scale_ranges is not None:\n        # shape (num_classes, num_scales)\n        all_ap = np.vstack([cls_result['ap'] for cls_result in eval_results])\n        all_num_gts = np.vstack(\n            [cls_result['num_gts'] for cls_result in eval_results])\n        mean_ap = []\n        for i in range(num_scales):\n            if np.any(all_num_gts[:, i] > 0):\n                mean_ap.append(all_ap[all_num_gts[:, i] > 0, i].mean())\n            else:\n                mean_ap.append(0.0)\n    else:\n        aps = []\n        for cls_result in eval_results:\n            if cls_result['num_gts'] > 0:\n                aps.append(cls_result['ap'])\n        mean_ap = np.array(aps).mean().item() if aps else 0.0\n\n    print_map_summary(\n        mean_ap, eval_results, dataset, area_ranges, logger=logger)\n\n    return mean_ap, eval_results\n\n\ndef print_map_summary(mean_ap,\n                      results,\n                      dataset=None,\n                      scale_ranges=None,\n                      logger=None):\n    \"\"\"Print mAP and results of each class.\n\n    A table will be printed to show the gts/dets/recall/AP of each class and\n    the mAP.\n\n    Args:\n        mean_ap (float): Calculated from `eval_map()`.\n        results (list[dict]): Calculated from `eval_map()`.\n        dataset (list[str] | str | None): Dataset name or dataset classes.\n        scale_ranges (list[tuple] | None): Range of scales to be evaluated.\n        logger (logging.Logger | str | None): The way to print the mAP\n            summary. See `mmcv.utils.print_log()` for details. Default: None.\n    \"\"\"\n\n    if logger == 'silent':\n        return\n\n    if isinstance(results[0]['ap'], np.ndarray):\n        num_scales = len(results[0]['ap'])\n    else:\n        num_scales = 1\n\n    if scale_ranges is not None:\n        assert len(scale_ranges) == num_scales\n\n    num_classes = len(results)\n\n    recalls = np.zeros((num_scales, num_classes), dtype=np.float32)\n    aps = np.zeros((num_scales, num_classes), dtype=np.float32)\n    num_gts = np.zeros((num_scales, num_classes), dtype=int)\n    for i, cls_result in enumerate(results):\n        if cls_result['recall'].size > 0:\n            recalls[:, i] = np.array(cls_result['recall'], ndmin=2)[:, -1]\n        aps[:, i] = cls_result['ap']\n        num_gts[:, i] = cls_result['num_gts']\n\n    if dataset is None:\n        label_names = [str(i) for i in range(num_classes)]\n    elif mmcv.is_str(dataset):\n        label_names = get_classes(dataset)\n    else:\n        label_names = dataset\n\n    if not isinstance(mean_ap, list):\n        mean_ap = [mean_ap]\n\n    header = ['class', 'gts', 'dets', 'recall', 'ap']\n    for i in range(num_scales):\n        if scale_ranges is not None:\n            print_log(f'Scale range {scale_ranges[i]}', logger=logger)\n        table_data = [header]\n        for j in range(num_classes):\n            row_data = [\n                label_names[j], num_gts[i, j], results[j]['num_dets'],\n                f'{recalls[i, j]:.3f}', f'{aps[i, j]:.3f}'\n            ]\n            table_data.append(row_data)\n        table_data.append(['mAP', '', '', '', f'{mean_ap[i]:.3f}'])\n        table = AsciiTable(table_data)\n        table.inner_footing_row_border = True\n        print_log('\\n' + table.table, logger=logger)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/panoptic_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# A custom value to distinguish instance ID and category ID; need to\n# be greater than the number of categories.\n# For a pixel in the panoptic result map:\n#   pan_id = ins_id * INSTANCE_OFFSET + cat_id\nINSTANCE_OFFSET = 1000\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/evaluation/recall.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom collections.abc import Sequence\n\nimport numpy as np\nfrom mmcv.utils import print_log\nfrom terminaltables import AsciiTable\n\nfrom .bbox_overlaps import bbox_overlaps\n\n\ndef _recalls(all_ious, proposal_nums, thrs):\n\n    img_num = all_ious.shape[0]\n    total_gt_num = sum([ious.shape[0] for ious in all_ious])\n\n    _ious = np.zeros((proposal_nums.size, total_gt_num), dtype=np.float32)\n    for k, proposal_num in enumerate(proposal_nums):\n        tmp_ious = np.zeros(0)\n        for i in range(img_num):\n            ious = all_ious[i][:, :proposal_num].copy()\n            gt_ious = np.zeros((ious.shape[0]))\n            if ious.size == 0:\n                tmp_ious = np.hstack((tmp_ious, gt_ious))\n                continue\n            for j in range(ious.shape[0]):\n                gt_max_overlaps = ious.argmax(axis=1)\n                max_ious = ious[np.arange(0, ious.shape[0]), gt_max_overlaps]\n                gt_idx = max_ious.argmax()\n                gt_ious[j] = max_ious[gt_idx]\n                box_idx = gt_max_overlaps[gt_idx]\n                ious[gt_idx, :] = -1\n                ious[:, box_idx] = -1\n            tmp_ious = np.hstack((tmp_ious, gt_ious))\n        _ious[k, :] = tmp_ious\n\n    _ious = np.fliplr(np.sort(_ious, axis=1))\n    recalls = np.zeros((proposal_nums.size, thrs.size))\n    for i, thr in enumerate(thrs):\n        recalls[:, i] = (_ious >= thr).sum(axis=1) / float(total_gt_num)\n\n    return recalls\n\n\ndef set_recall_param(proposal_nums, iou_thrs):\n    \"\"\"Check proposal_nums and iou_thrs and set correct format.\"\"\"\n    if isinstance(proposal_nums, Sequence):\n        _proposal_nums = np.array(proposal_nums)\n    elif isinstance(proposal_nums, int):\n        _proposal_nums = np.array([proposal_nums])\n    else:\n        _proposal_nums = proposal_nums\n\n    if iou_thrs is None:\n        _iou_thrs = np.array([0.5])\n    elif isinstance(iou_thrs, Sequence):\n        _iou_thrs = np.array(iou_thrs)\n    elif isinstance(iou_thrs, float):\n        _iou_thrs = np.array([iou_thrs])\n    else:\n        _iou_thrs = iou_thrs\n\n    return _proposal_nums, _iou_thrs\n\n\ndef eval_recalls(gts,\n                 proposals,\n                 proposal_nums=None,\n                 iou_thrs=0.5,\n                 logger=None,\n                 use_legacy_coordinate=False):\n    \"\"\"Calculate recalls.\n\n    Args:\n        gts (list[ndarray]): a list of arrays of shape (n, 4)\n        proposals (list[ndarray]): a list of arrays of shape (k, 4) or (k, 5)\n        proposal_nums (int | Sequence[int]): Top N proposals to be evaluated.\n        iou_thrs (float | Sequence[float]): IoU thresholds. Default: 0.5.\n        logger (logging.Logger | str | None): The way to print the recall\n            summary. See `mmcv.utils.print_log()` for details. Default: None.\n        use_legacy_coordinate (bool): Whether use coordinate system\n            in mmdet v1.x. \"1\" was added to both height and width\n            which means w, h should be\n            computed as 'x2 - x1 + 1` and 'y2 - y1 + 1'. Default: False.\n\n\n    Returns:\n        ndarray: recalls of different ious and proposal nums\n    \"\"\"\n\n    img_num = len(gts)\n    assert img_num == len(proposals)\n    proposal_nums, iou_thrs = set_recall_param(proposal_nums, iou_thrs)\n    all_ious = []\n    for i in range(img_num):\n        if proposals[i].ndim == 2 and proposals[i].shape[1] == 5:\n            scores = proposals[i][:, 4]\n            sort_idx = np.argsort(scores)[::-1]\n            img_proposal = proposals[i][sort_idx, :]\n        else:\n            img_proposal = proposals[i]\n        prop_num = min(img_proposal.shape[0], proposal_nums[-1])\n        if gts[i] is None or gts[i].shape[0] == 0:\n            ious = np.zeros((0, img_proposal.shape[0]), dtype=np.float32)\n        else:\n            ious = bbox_overlaps(\n                gts[i],\n                img_proposal[:prop_num, :4],\n                use_legacy_coordinate=use_legacy_coordinate)\n        all_ious.append(ious)\n    all_ious = np.array(all_ious)\n    recalls = _recalls(all_ious, proposal_nums, iou_thrs)\n\n    print_recall_summary(recalls, proposal_nums, iou_thrs, logger=logger)\n    return recalls\n\n\ndef print_recall_summary(recalls,\n                         proposal_nums,\n                         iou_thrs,\n                         row_idxs=None,\n                         col_idxs=None,\n                         logger=None):\n    \"\"\"Print recalls in a table.\n\n    Args:\n        recalls (ndarray): calculated from `bbox_recalls`\n        proposal_nums (ndarray or list): top N proposals\n        iou_thrs (ndarray or list): iou thresholds\n        row_idxs (ndarray): which rows(proposal nums) to print\n        col_idxs (ndarray): which cols(iou thresholds) to print\n        logger (logging.Logger | str | None): The way to print the recall\n            summary. See `mmcv.utils.print_log()` for details. Default: None.\n    \"\"\"\n    proposal_nums = np.array(proposal_nums, dtype=np.int32)\n    iou_thrs = np.array(iou_thrs)\n    if row_idxs is None:\n        row_idxs = np.arange(proposal_nums.size)\n    if col_idxs is None:\n        col_idxs = np.arange(iou_thrs.size)\n    row_header = [''] + iou_thrs[col_idxs].tolist()\n    table_data = [row_header]\n    for i, num in enumerate(proposal_nums[row_idxs]):\n        row = [f'{val:.3f}' for val in recalls[row_idxs[i], col_idxs].tolist()]\n        row.insert(0, num)\n        table_data.append(row)\n    table = AsciiTable(table_data)\n    print_log('\\n' + table.table, logger=logger)\n\n\ndef plot_num_recall(recalls, proposal_nums):\n    \"\"\"Plot Proposal_num-Recalls curve.\n\n    Args:\n        recalls(ndarray or list): shape (k,)\n        proposal_nums(ndarray or list): same shape as `recalls`\n    \"\"\"\n    if isinstance(proposal_nums, np.ndarray):\n        _proposal_nums = proposal_nums.tolist()\n    else:\n        _proposal_nums = proposal_nums\n    if isinstance(recalls, np.ndarray):\n        _recalls = recalls.tolist()\n    else:\n        _recalls = recalls\n\n    import matplotlib.pyplot as plt\n    f = plt.figure()\n    plt.plot([0] + _proposal_nums, [0] + _recalls)\n    plt.xlabel('Proposal num')\n    plt.ylabel('Recall')\n    plt.axis([0, proposal_nums.max(), 0, 1])\n    f.show()\n\n\ndef plot_iou_recall(recalls, iou_thrs):\n    \"\"\"Plot IoU-Recalls curve.\n\n    Args:\n        recalls(ndarray or list): shape (k,)\n        iou_thrs(ndarray or list): same shape as `recalls`\n    \"\"\"\n    if isinstance(iou_thrs, np.ndarray):\n        _iou_thrs = iou_thrs.tolist()\n    else:\n        _iou_thrs = iou_thrs\n    if isinstance(recalls, np.ndarray):\n        _recalls = recalls.tolist()\n    else:\n        _recalls = recalls\n\n    import matplotlib.pyplot as plt\n    f = plt.figure()\n    plt.plot(_iou_thrs + [1.0], _recalls + [0.])\n    plt.xlabel('IoU')\n    plt.ylabel('Recall')\n    plt.axis([iou_thrs.min(), 1, 0, 1])\n    f.show()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/export/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .onnx_helper import (add_dummy_nms_for_onnx, dynamic_clip_for_onnx,\n                          get_k_for_topk)\nfrom .pytorch2onnx import (build_model_from_cfg,\n                           generate_inputs_and_wrap_model,\n                           preprocess_example_input)\n\n__all__ = [\n    'build_model_from_cfg', 'generate_inputs_and_wrap_model',\n    'preprocess_example_input', 'get_k_for_topk', 'add_dummy_nms_for_onnx',\n    'dynamic_clip_for_onnx'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/export/model_wrappers.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport warnings\n\nimport numpy as np\nimport torch\n\nfrom mmdet.core import bbox2result\nfrom mmdet.models import BaseDetector\n\n\nclass DeployBaseDetector(BaseDetector):\n    \"\"\"DeployBaseDetector.\"\"\"\n\n    def __init__(self, class_names, device_id):\n        super(DeployBaseDetector, self).__init__()\n        self.CLASSES = class_names\n        self.device_id = device_id\n\n    def simple_test(self, img, img_metas, **kwargs):\n        raise NotImplementedError('This method is not implemented.')\n\n    def aug_test(self, imgs, img_metas, **kwargs):\n        raise NotImplementedError('This method is not implemented.')\n\n    def extract_feat(self, imgs):\n        raise NotImplementedError('This method is not implemented.')\n\n    def forward_train(self, imgs, img_metas, **kwargs):\n        raise NotImplementedError('This method is not implemented.')\n\n    def val_step(self, data, optimizer):\n        raise NotImplementedError('This method is not implemented.')\n\n    def train_step(self, data, optimizer):\n        raise NotImplementedError('This method is not implemented.')\n\n    def forward_test(self, *, img, img_metas, **kwargs):\n        raise NotImplementedError('This method is not implemented.')\n\n    def async_simple_test(self, img, img_metas, **kwargs):\n        raise NotImplementedError('This method is not implemented.')\n\n    def forward(self, img, img_metas, return_loss=True, **kwargs):\n        outputs = self.forward_test(img, img_metas, **kwargs)\n        batch_dets, batch_labels = outputs[:2]\n        batch_masks = outputs[2] if len(outputs) == 3 else None\n        batch_size = img[0].shape[0]\n        img_metas = img_metas[0]\n        results = []\n        rescale = kwargs.get('rescale', True)\n        for i in range(batch_size):\n            dets, labels = batch_dets[i], batch_labels[i]\n            if rescale:\n                scale_factor = img_metas[i]['scale_factor']\n\n                if isinstance(scale_factor, (list, tuple, np.ndarray)):\n                    assert len(scale_factor) == 4\n                    scale_factor = np.array(scale_factor)[None, :]  # [1,4]\n                dets[:, :4] /= scale_factor\n\n            if 'border' in img_metas[i]:\n                # offset pixel of the top-left corners between original image\n                # and padded/enlarged image, 'border' is used when exporting\n                # CornerNet and CentripetalNet to onnx\n                x_off = img_metas[i]['border'][2]\n                y_off = img_metas[i]['border'][0]\n                dets[:, [0, 2]] -= x_off\n                dets[:, [1, 3]] -= y_off\n                dets[:, :4] *= (dets[:, :4] > 0).astype(dets.dtype)\n\n            dets_results = bbox2result(dets, labels, len(self.CLASSES))\n\n            if batch_masks is not None:\n                masks = batch_masks[i]\n                img_h, img_w = img_metas[i]['img_shape'][:2]\n                ori_h, ori_w = img_metas[i]['ori_shape'][:2]\n                masks = masks[:, :img_h, :img_w]\n                if rescale:\n                    masks = masks.astype(np.float32)\n                    masks = torch.from_numpy(masks)\n                    masks = torch.nn.functional.interpolate(\n                        masks.unsqueeze(0), size=(ori_h, ori_w))\n                    masks = masks.squeeze(0).detach().numpy()\n                if masks.dtype != np.bool:\n                    masks = masks >= 0.5\n                segms_results = [[] for _ in range(len(self.CLASSES))]\n                for j in range(len(dets)):\n                    segms_results[labels[j]].append(masks[j])\n                results.append((dets_results, segms_results))\n            else:\n                results.append(dets_results)\n        return results\n\n\nclass ONNXRuntimeDetector(DeployBaseDetector):\n    \"\"\"Wrapper for detector's inference with ONNXRuntime.\"\"\"\n\n    def __init__(self, onnx_file, class_names, device_id):\n        super(ONNXRuntimeDetector, self).__init__(class_names, device_id)\n        import onnxruntime as ort\n\n        # get the custom op path\n        ort_custom_op_path = ''\n        try:\n            from mmcv.ops import get_onnxruntime_op_path\n            ort_custom_op_path = get_onnxruntime_op_path()\n        except (ImportError, ModuleNotFoundError):\n            warnings.warn('If input model has custom op from mmcv, \\\n                you may have to build mmcv with ONNXRuntime from source.')\n        session_options = ort.SessionOptions()\n        # register custom op for onnxruntime\n        if osp.exists(ort_custom_op_path):\n            session_options.register_custom_ops_library(ort_custom_op_path)\n        sess = ort.InferenceSession(onnx_file, session_options)\n        providers = ['CPUExecutionProvider']\n        options = [{}]\n        is_cuda_available = ort.get_device() == 'GPU'\n        if is_cuda_available:\n            providers.insert(0, 'CUDAExecutionProvider')\n            options.insert(0, {'device_id': device_id})\n\n        sess.set_providers(providers, options)\n\n        self.sess = sess\n        self.io_binding = sess.io_binding()\n        self.output_names = [_.name for _ in sess.get_outputs()]\n        self.is_cuda_available = is_cuda_available\n\n    def forward_test(self, imgs, img_metas, **kwargs):\n        input_data = imgs[0]\n        # set io binding for inputs/outputs\n        device_type = 'cuda' if self.is_cuda_available else 'cpu'\n        if not self.is_cuda_available:\n            input_data = input_data.cpu()\n        self.io_binding.bind_input(\n            name='input',\n            device_type=device_type,\n            device_id=self.device_id,\n            element_type=np.float32,\n            shape=input_data.shape,\n            buffer_ptr=input_data.data_ptr())\n\n        for name in self.output_names:\n            self.io_binding.bind_output(name)\n        # run session to get outputs\n        self.sess.run_with_iobinding(self.io_binding)\n        ort_outputs = self.io_binding.copy_outputs_to_cpu()\n        return ort_outputs\n\n\nclass TensorRTDetector(DeployBaseDetector):\n    \"\"\"Wrapper for detector's inference with TensorRT.\"\"\"\n\n    def __init__(self, engine_file, class_names, device_id, output_names=None):\n        super(TensorRTDetector, self).__init__(class_names, device_id)\n        warnings.warn('`output_names` is deprecated and will be removed in '\n                      'future releases.')\n        from mmcv.tensorrt import TRTWraper, load_tensorrt_plugin\n        try:\n            load_tensorrt_plugin()\n        except (ImportError, ModuleNotFoundError):\n            warnings.warn('If input model has custom op from mmcv, \\\n                you may have to build mmcv with TensorRT from source.')\n\n        output_names = ['dets', 'labels']\n        model = TRTWraper(engine_file, ['input'], output_names)\n        with_masks = False\n        # if TensorRT has totally 4 inputs/outputs, then\n        # the detector should have `mask` output.\n        if len(model.engine) == 4:\n            model.output_names = output_names + ['masks']\n            with_masks = True\n        self.model = model\n        self.with_masks = with_masks\n\n    def forward_test(self, imgs, img_metas, **kwargs):\n        input_data = imgs[0].contiguous()\n        with torch.cuda.device(self.device_id), torch.no_grad():\n            outputs = self.model({'input': input_data})\n            outputs = [outputs[name] for name in self.model.output_names]\n        outputs = [out.detach().cpu().numpy() for out in outputs]\n        return outputs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/export/onnx_helper.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\n\nimport torch\n\n\ndef dynamic_clip_for_onnx(x1, y1, x2, y2, max_shape):\n    \"\"\"Clip boxes dynamically for onnx.\n\n    Since torch.clamp cannot have dynamic `min` and `max`, we scale the\n      boxes by 1/max_shape and clamp in the range [0, 1].\n\n    Args:\n        x1 (Tensor): The x1 for bounding boxes.\n        y1 (Tensor): The y1 for bounding boxes.\n        x2 (Tensor): The x2 for bounding boxes.\n        y2 (Tensor): The y2 for bounding boxes.\n        max_shape (Tensor or torch.Size): The (H,W) of original image.\n    Returns:\n        tuple(Tensor): The clipped x1, y1, x2, y2.\n    \"\"\"\n    assert isinstance(\n        max_shape,\n        torch.Tensor), '`max_shape` should be tensor of (h,w) for onnx'\n\n    # scale by 1/max_shape\n    x1 = x1 / max_shape[1]\n    y1 = y1 / max_shape[0]\n    x2 = x2 / max_shape[1]\n    y2 = y2 / max_shape[0]\n\n    # clamp [0, 1]\n    x1 = torch.clamp(x1, 0, 1)\n    y1 = torch.clamp(y1, 0, 1)\n    x2 = torch.clamp(x2, 0, 1)\n    y2 = torch.clamp(y2, 0, 1)\n\n    # scale back\n    x1 = x1 * max_shape[1]\n    y1 = y1 * max_shape[0]\n    x2 = x2 * max_shape[1]\n    y2 = y2 * max_shape[0]\n    return x1, y1, x2, y2\n\n\ndef get_k_for_topk(k, size):\n    \"\"\"Get k of TopK for onnx exporting.\n\n    The K of TopK in TensorRT should not be a Tensor, while in ONNX Runtime\n      it could be a Tensor.Due to dynamic shape feature, we have to decide\n      whether to do TopK and what K it should be while exporting to ONNX.\n    If returned K is less than zero, it means we do not have to do\n      TopK operation.\n\n    Args:\n        k (int or Tensor): The set k value for nms from config file.\n        size (Tensor or torch.Size): The number of elements of \\\n            TopK's input tensor\n    Returns:\n        tuple: (int or Tensor): The final K for TopK.\n    \"\"\"\n    ret_k = -1\n    if k <= 0 or size <= 0:\n        return ret_k\n    if torch.onnx.is_in_onnx_export():\n        is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'\n        if is_trt_backend:\n            # TensorRT does not support dynamic K with TopK op\n            if 0 < k < size:\n                ret_k = k\n        else:\n            # Always keep topk op for dynamic input in onnx for ONNX Runtime\n            ret_k = torch.where(k < size, k, size)\n    elif k < size:\n        ret_k = k\n    else:\n        # ret_k is -1\n        pass\n    return ret_k\n\n\ndef add_dummy_nms_for_onnx(boxes,\n                           scores,\n                           max_output_boxes_per_class=1000,\n                           iou_threshold=0.5,\n                           score_threshold=0.05,\n                           pre_top_k=-1,\n                           after_top_k=-1,\n                           labels=None):\n    \"\"\"Create a dummy onnx::NonMaxSuppression op while exporting to ONNX.\n\n    This function helps exporting to onnx with batch and multiclass NMS op.\n    It only supports class-agnostic detection results. That is, the scores\n    is of shape (N, num_bboxes, num_classes) and the boxes is of shape\n    (N, num_boxes, 4).\n\n    Args:\n        boxes (Tensor): The bounding boxes of shape [N, num_boxes, 4]\n        scores (Tensor): The detection scores of shape\n            [N, num_boxes, num_classes]\n        max_output_boxes_per_class (int): Maximum number of output\n            boxes per class of nms. Defaults to 1000.\n        iou_threshold (float): IOU threshold of nms. Defaults to 0.5\n        score_threshold (float): score threshold of nms.\n            Defaults to 0.05.\n        pre_top_k (bool): Number of top K boxes to keep before nms.\n            Defaults to -1.\n        after_top_k (int): Number of top K boxes to keep after nms.\n            Defaults to -1.\n        labels (Tensor, optional): It not None, explicit labels would be used.\n            Otherwise, labels would be automatically generated using\n            num_classed. Defaults to None.\n\n    Returns:\n        tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]\n            and class labels of shape [N, num_det].\n    \"\"\"\n    max_output_boxes_per_class = torch.LongTensor([max_output_boxes_per_class])\n    iou_threshold = torch.tensor([iou_threshold], dtype=torch.float32)\n    score_threshold = torch.tensor([score_threshold], dtype=torch.float32)\n    batch_size = scores.shape[0]\n    num_class = scores.shape[2]\n\n    nms_pre = torch.tensor(pre_top_k, device=scores.device, dtype=torch.long)\n    nms_pre = get_k_for_topk(nms_pre, boxes.shape[1])\n\n    if nms_pre > 0:\n        max_scores, _ = scores.max(-1)\n        _, topk_inds = max_scores.topk(nms_pre)\n        batch_inds = torch.arange(batch_size).view(\n            -1, 1).expand_as(topk_inds).long()\n        # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501\n        transformed_inds = boxes.shape[1] * batch_inds + topk_inds\n        boxes = boxes.reshape(-1, 4)[transformed_inds, :].reshape(\n            batch_size, -1, 4)\n        scores = scores.reshape(-1, num_class)[transformed_inds, :].reshape(\n            batch_size, -1, num_class)\n        if labels is not None:\n            labels = labels.reshape(-1, 1)[transformed_inds].reshape(\n                batch_size, -1)\n\n    scores = scores.permute(0, 2, 1)\n    num_box = boxes.shape[1]\n    # turn off tracing to create a dummy output of nms\n    state = torch._C._get_tracing_state()\n    # dummy indices of nms's output\n    num_fake_det = 2\n    batch_inds = torch.randint(batch_size, (num_fake_det, 1))\n    cls_inds = torch.randint(num_class, (num_fake_det, 1))\n    box_inds = torch.randint(num_box, (num_fake_det, 1))\n    indices = torch.cat([batch_inds, cls_inds, box_inds], dim=1)\n    output = indices\n    setattr(DummyONNXNMSop, 'output', output)\n\n    # open tracing\n    torch._C._set_tracing_state(state)\n    selected_indices = DummyONNXNMSop.apply(boxes, scores,\n                                            max_output_boxes_per_class,\n                                            iou_threshold, score_threshold)\n\n    batch_inds, cls_inds = selected_indices[:, 0], selected_indices[:, 1]\n    box_inds = selected_indices[:, 2]\n    if labels is None:\n        labels = torch.arange(num_class, dtype=torch.long).to(scores.device)\n        labels = labels.view(1, num_class, 1).expand_as(scores)\n    scores = scores.reshape(-1, 1)\n    boxes = boxes.reshape(batch_size, -1).repeat(1, num_class).reshape(-1, 4)\n    pos_inds = (num_class * batch_inds + cls_inds) * num_box + box_inds\n    mask = scores.new_zeros(scores.shape)\n    # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501\n    # PyTorch style code: mask[batch_inds, box_inds] += 1\n    mask[pos_inds, :] += 1\n    scores = scores * mask\n    boxes = boxes * mask\n\n    scores = scores.reshape(batch_size, -1)\n    boxes = boxes.reshape(batch_size, -1, 4)\n    labels = labels.reshape(batch_size, -1)\n\n    nms_after = torch.tensor(\n        after_top_k, device=scores.device, dtype=torch.long)\n    nms_after = get_k_for_topk(nms_after, num_box * num_class)\n\n    if nms_after > 0:\n        _, topk_inds = scores.topk(nms_after)\n        batch_inds = torch.arange(batch_size).view(-1, 1).expand_as(topk_inds)\n        # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501\n        transformed_inds = scores.shape[1] * batch_inds + topk_inds\n        scores = scores.reshape(-1, 1)[transformed_inds, :].reshape(\n            batch_size, -1)\n        boxes = boxes.reshape(-1, 4)[transformed_inds, :].reshape(\n            batch_size, -1, 4)\n        labels = labels.reshape(-1, 1)[transformed_inds, :].reshape(\n            batch_size, -1)\n\n    scores = scores.unsqueeze(2)\n    dets = torch.cat([boxes, scores], dim=2)\n    return dets, labels\n\n\nclass DummyONNXNMSop(torch.autograd.Function):\n    \"\"\"DummyONNXNMSop.\n\n    This class is only for creating onnx::NonMaxSuppression.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, boxes, scores, max_output_boxes_per_class, iou_threshold,\n                score_threshold):\n\n        return DummyONNXNMSop.output\n\n    @staticmethod\n    def symbolic(g, boxes, scores, max_output_boxes_per_class, iou_threshold,\n                 score_threshold):\n        return g.op(\n            'NonMaxSuppression',\n            boxes,\n            scores,\n            max_output_boxes_per_class,\n            iou_threshold,\n            score_threshold,\n            outputs=1)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/export/pytorch2onnx.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom functools import partial\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv.runner import load_checkpoint\n\n\ndef generate_inputs_and_wrap_model(config_path,\n                                   checkpoint_path,\n                                   input_config,\n                                   cfg_options=None):\n    \"\"\"Prepare sample input and wrap model for ONNX export.\n\n    The ONNX export API only accept args, and all inputs should be\n    torch.Tensor or corresponding types (such as tuple of tensor).\n    So we should call this function before exporting. This function will:\n\n    1. generate corresponding inputs which are used to execute the model.\n    2. Wrap the model's forward function.\n\n    For example, the MMDet models' forward function has a parameter\n    ``return_loss:bool``. As we want to set it as False while export API\n    supports neither bool type or kwargs. So we have to replace the forward\n    method like ``model.forward = partial(model.forward, return_loss=False)``.\n\n    Args:\n        config_path (str): the OpenMMLab config for the model we want to\n            export to ONNX\n        checkpoint_path (str): Path to the corresponding checkpoint\n        input_config (dict): the exactly data in this dict depends on the\n            framework. For MMSeg, we can just declare the input shape,\n            and generate the dummy data accordingly. However, for MMDet,\n            we may pass the real img path, or the NMS will return None\n            as there is no legal bbox.\n\n    Returns:\n        tuple: (model, tensor_data) wrapped model which can be called by\n            ``model(*tensor_data)`` and a list of inputs which are used to\n            execute the model while exporting.\n    \"\"\"\n\n    model = build_model_from_cfg(\n        config_path, checkpoint_path, cfg_options=cfg_options)\n    one_img, one_meta = preprocess_example_input(input_config)\n    tensor_data = [one_img]\n    model.forward = partial(\n        model.forward, img_metas=[[one_meta]], return_loss=False)\n\n    # pytorch has some bug in pytorch1.3, we have to fix it\n    # by replacing these existing op\n    opset_version = 11\n    # put the import within the function thus it will not cause import error\n    # when not using this function\n    try:\n        from mmcv.onnx.symbolic import register_extra_symbolics\n    except ModuleNotFoundError:\n        raise NotImplementedError('please update mmcv to version>=v1.0.4')\n    register_extra_symbolics(opset_version)\n\n    return model, tensor_data\n\n\ndef build_model_from_cfg(config_path, checkpoint_path, cfg_options=None):\n    \"\"\"Build a model from config and load the given checkpoint.\n\n    Args:\n        config_path (str): the OpenMMLab config for the model we want to\n            export to ONNX\n        checkpoint_path (str): Path to the corresponding checkpoint\n\n    Returns:\n        torch.nn.Module: the built model\n    \"\"\"\n    from mmdet.models import build_detector\n\n    cfg = mmcv.Config.fromfile(config_path)\n    if cfg_options is not None:\n        cfg.merge_from_dict(cfg_options)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the model\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    checkpoint = load_checkpoint(model, checkpoint_path, map_location='cpu')\n    if 'CLASSES' in checkpoint.get('meta', {}):\n        model.CLASSES = checkpoint['meta']['CLASSES']\n    else:\n        from mmdet.datasets import DATASETS\n        dataset = DATASETS.get(cfg.data.test['type'])\n        assert (dataset is not None)\n        model.CLASSES = dataset.CLASSES\n    model.cpu().eval()\n    return model\n\n\ndef preprocess_example_input(input_config):\n    \"\"\"Prepare an example input image for ``generate_inputs_and_wrap_model``.\n\n    Args:\n        input_config (dict): customized config describing the example input.\n\n    Returns:\n        tuple: (one_img, one_meta), tensor of the example input image and \\\n            meta information for the example input image.\n\n    Examples:\n        >>> from mmdet.core.export import preprocess_example_input\n        >>> input_config = {\n        >>>         'input_shape': (1,3,224,224),\n        >>>         'input_path': 'demo/demo.jpg',\n        >>>         'normalize_cfg': {\n        >>>             'mean': (123.675, 116.28, 103.53),\n        >>>             'std': (58.395, 57.12, 57.375)\n        >>>             }\n        >>>         }\n        >>> one_img, one_meta = preprocess_example_input(input_config)\n        >>> print(one_img.shape)\n        torch.Size([1, 3, 224, 224])\n        >>> print(one_meta)\n        {'img_shape': (224, 224, 3),\n        'ori_shape': (224, 224, 3),\n        'pad_shape': (224, 224, 3),\n        'filename': '<demo>.png',\n        'scale_factor': 1.0,\n        'flip': False}\n    \"\"\"\n    input_path = input_config['input_path']\n    input_shape = input_config['input_shape']\n    one_img = mmcv.imread(input_path)\n    one_img = mmcv.imresize(one_img, input_shape[2:][::-1])\n    show_img = one_img.copy()\n    if 'normalize_cfg' in input_config.keys():\n        normalize_cfg = input_config['normalize_cfg']\n        mean = np.array(normalize_cfg['mean'], dtype=np.float32)\n        std = np.array(normalize_cfg['std'], dtype=np.float32)\n        to_rgb = normalize_cfg.get('to_rgb', True)\n        one_img = mmcv.imnormalize(one_img, mean, std, to_rgb=to_rgb)\n    one_img = one_img.transpose(2, 0, 1)\n    one_img = torch.from_numpy(one_img).unsqueeze(0).float().requires_grad_(\n        True)\n    (_, C, H, W) = input_shape\n    one_meta = {\n        'img_shape': (H, W, C),\n        'ori_shape': (H, W, C),\n        'pad_shape': (H, W, C),\n        'filename': '<demo>.png',\n        'scale_factor': np.ones(4, dtype=np.float32),\n        'flip': False,\n        'show_img': show_img,\n        'flip_direction': None\n    }\n\n    return one_img, one_meta\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .checkloss_hook import CheckInvalidLossHook\nfrom .ema import ExpMomentumEMAHook, LinearMomentumEMAHook\nfrom .memory_profiler_hook import MemoryProfilerHook\nfrom .set_epoch_info_hook import SetEpochInfoHook\nfrom .sync_norm_hook import SyncNormHook\nfrom .sync_random_size_hook import SyncRandomSizeHook\nfrom .wandblogger_hook import MMDetWandbHook\nfrom .yolox_lrupdater_hook import YOLOXLrUpdaterHook\nfrom .yolox_mode_switch_hook import YOLOXModeSwitchHook\n\n__all__ = [\n    'SyncRandomSizeHook', 'YOLOXModeSwitchHook', 'SyncNormHook',\n    'ExpMomentumEMAHook', 'LinearMomentumEMAHook', 'YOLOXLrUpdaterHook',\n    'CheckInvalidLossHook', 'SetEpochInfoHook', 'MemoryProfilerHook',\n    'MMDetWandbHook'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/checkloss_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner.hooks import HOOKS, Hook\n\n\n@HOOKS.register_module()\nclass CheckInvalidLossHook(Hook):\n    \"\"\"Check invalid loss hook.\n\n    This hook will regularly check whether the loss is valid\n    during training.\n\n    Args:\n        interval (int): Checking interval (every k iterations).\n            Default: 50.\n    \"\"\"\n\n    def __init__(self, interval=50):\n        self.interval = interval\n\n    def after_train_iter(self, runner):\n        if self.every_n_iters(runner, self.interval):\n            assert torch.isfinite(runner.outputs['loss']), \\\n                runner.logger.info('loss become infinite or NaN!')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/ema.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nfrom mmcv.parallel import is_module_wrapper\nfrom mmcv.runner.hooks import HOOKS, Hook\n\n\nclass BaseEMAHook(Hook):\n    \"\"\"Exponential Moving Average Hook.\n\n    Use Exponential Moving Average on all parameters of model in training\n    process. All parameters have a ema backup, which update by the formula\n    as below. EMAHook takes priority over EvalHook and CheckpointHook. Note,\n    the original model parameters are actually saved in ema field after train.\n\n    Args:\n        momentum (float): The momentum used for updating ema parameter.\n            Ema's parameter are updated with the formula:\n           `ema_param = (1-momentum) * ema_param + momentum * cur_param`.\n            Defaults to 0.0002.\n        skip_buffers (bool): Whether to skip the model buffers, such as\n            batchnorm running stats (running_mean, running_var), it does not\n            perform the ema operation. Default to False.\n        interval (int): Update ema parameter every interval iteration.\n            Defaults to 1.\n        resume_from (str, optional): The checkpoint path. Defaults to None.\n        momentum_fun (func, optional): The function to change momentum\n            during early iteration (also warmup) to help early training.\n            It uses `momentum` as a constant. Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 momentum=0.0002,\n                 interval=1,\n                 skip_buffers=False,\n                 resume_from=None,\n                 momentum_fun=None):\n        assert 0 < momentum < 1\n        self.momentum = momentum\n        self.skip_buffers = skip_buffers\n        self.interval = interval\n        self.checkpoint = resume_from\n        self.momentum_fun = momentum_fun\n\n    def before_run(self, runner):\n        \"\"\"To resume model with it's ema parameters more friendly.\n\n        Register ema parameter as ``named_buffer`` to model.\n        \"\"\"\n        model = runner.model\n        if is_module_wrapper(model):\n            model = model.module\n        self.param_ema_buffer = {}\n        if self.skip_buffers:\n            self.model_parameters = dict(model.named_parameters())\n        else:\n            self.model_parameters = model.state_dict()\n        for name, value in self.model_parameters.items():\n            # \".\" is not allowed in module's buffer name\n            buffer_name = f\"ema_{name.replace('.', '_')}\"\n            self.param_ema_buffer[name] = buffer_name\n            model.register_buffer(buffer_name, value.data.clone())\n        self.model_buffers = dict(model.named_buffers())\n        if self.checkpoint is not None:\n            runner.resume(self.checkpoint)\n\n    def get_momentum(self, runner):\n        return self.momentum_fun(runner.iter) if self.momentum_fun else \\\n                        self.momentum\n\n    def after_train_iter(self, runner):\n        \"\"\"Update ema parameter every self.interval iterations.\"\"\"\n        if (runner.iter + 1) % self.interval != 0:\n            return\n        momentum = self.get_momentum(runner)\n        for name, parameter in self.model_parameters.items():\n            # exclude num_tracking\n            if parameter.dtype.is_floating_point:\n                buffer_name = self.param_ema_buffer[name]\n                buffer_parameter = self.model_buffers[buffer_name]\n                buffer_parameter.mul_(1 - momentum).add_(\n                    parameter.data, alpha=momentum)\n\n    def after_train_epoch(self, runner):\n        \"\"\"We load parameter values from ema backup to model before the\n        EvalHook.\"\"\"\n        self._swap_ema_parameters()\n\n    def before_train_epoch(self, runner):\n        \"\"\"We recover model's parameter from ema backup after last epoch's\n        EvalHook.\"\"\"\n        self._swap_ema_parameters()\n\n    def _swap_ema_parameters(self):\n        \"\"\"Swap the parameter of model with parameter in ema_buffer.\"\"\"\n        for name, value in self.model_parameters.items():\n            temp = value.data.clone()\n            ema_buffer = self.model_buffers[self.param_ema_buffer[name]]\n            value.data.copy_(ema_buffer.data)\n            ema_buffer.data.copy_(temp)\n\n\n@HOOKS.register_module()\nclass ExpMomentumEMAHook(BaseEMAHook):\n    \"\"\"EMAHook using exponential momentum strategy.\n\n    Args:\n        total_iter (int): The total number of iterations of EMA momentum.\n           Defaults to 2000.\n    \"\"\"\n\n    def __init__(self, total_iter=2000, **kwargs):\n        super(ExpMomentumEMAHook, self).__init__(**kwargs)\n        self.momentum_fun = lambda x: (1 - self.momentum) * math.exp(-(\n            1 + x) / total_iter) + self.momentum\n\n\n@HOOKS.register_module()\nclass LinearMomentumEMAHook(BaseEMAHook):\n    \"\"\"EMAHook using linear momentum strategy.\n\n    Args:\n        warm_up (int): During first warm_up steps, we may use smaller decay\n            to update ema parameters more slowly. Defaults to 100.\n    \"\"\"\n\n    def __init__(self, warm_up=100, **kwargs):\n        super(LinearMomentumEMAHook, self).__init__(**kwargs)\n        self.momentum_fun = lambda x: min(self.momentum**self.interval,\n                                          (1 + x) / (warm_up + x))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/memory_profiler_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.runner.hooks import HOOKS, Hook\n\n\n@HOOKS.register_module()\nclass MemoryProfilerHook(Hook):\n    \"\"\"Memory profiler hook recording memory information including virtual\n    memory, swap memory, and the memory of the current process.\n\n    Args:\n        interval (int): Checking interval (every k iterations).\n            Default: 50.\n    \"\"\"\n\n    def __init__(self, interval=50):\n        try:\n            from psutil import swap_memory, virtual_memory\n            self._swap_memory = swap_memory\n            self._virtual_memory = virtual_memory\n        except ImportError:\n            raise ImportError('psutil is not installed, please install it by: '\n                              'pip install psutil')\n\n        try:\n            from memory_profiler import memory_usage\n            self._memory_usage = memory_usage\n        except ImportError:\n            raise ImportError(\n                'memory_profiler is not installed, please install it by: '\n                'pip install memory_profiler')\n\n        self.interval = interval\n\n    def after_iter(self, runner):\n        if self.every_n_iters(runner, self.interval):\n            # in Byte\n            virtual_memory = self._virtual_memory()\n            swap_memory = self._swap_memory()\n            # in MB\n            process_memory = self._memory_usage()[0]\n            factor = 1024 * 1024\n            runner.logger.info(\n                'Memory information '\n                'available_memory: '\n                f'{round(virtual_memory.available / factor)} MB, '\n                'used_memory: '\n                f'{round(virtual_memory.used / factor)} MB, '\n                f'memory_utilization: {virtual_memory.percent} %, '\n                'available_swap_memory: '\n                f'{round((swap_memory.total - swap_memory.used) / factor)}'\n                ' MB, '\n                f'used_swap_memory: {round(swap_memory.used / factor)} MB, '\n                f'swap_memory_utilization: {swap_memory.percent} %, '\n                'current_process_memory: '\n                f'{round(process_memory)} MB')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/set_epoch_info_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.parallel import is_module_wrapper\nfrom mmcv.runner import HOOKS, Hook\n\n\n@HOOKS.register_module()\nclass SetEpochInfoHook(Hook):\n    \"\"\"Set runner's epoch information to the model.\"\"\"\n\n    def before_train_epoch(self, runner):\n        epoch = runner.epoch\n        model = runner.model\n        if is_module_wrapper(model):\n            model = model.module\n        model.set_epoch(epoch)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/sync_norm_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom collections import OrderedDict\n\nfrom mmcv.runner import get_dist_info\nfrom mmcv.runner.hooks import HOOKS, Hook\nfrom torch import nn\n\nfrom ..utils.dist_utils import all_reduce_dict\n\n\ndef get_norm_states(module):\n    async_norm_states = OrderedDict()\n    for name, child in module.named_modules():\n        if isinstance(child, nn.modules.batchnorm._NormBase):\n            for k, v in child.state_dict().items():\n                async_norm_states['.'.join([name, k])] = v\n    return async_norm_states\n\n\n@HOOKS.register_module()\nclass SyncNormHook(Hook):\n    \"\"\"Synchronize Norm states after training epoch, currently used in YOLOX.\n\n    Args:\n        num_last_epochs (int): The number of latter epochs in the end of the\n            training to switch to synchronizing norm interval. Default: 15.\n        interval (int): Synchronizing norm interval. Default: 1.\n    \"\"\"\n\n    def __init__(self, num_last_epochs=15, interval=1):\n        self.interval = interval\n        self.num_last_epochs = num_last_epochs\n\n    def before_train_epoch(self, runner):\n        epoch = runner.epoch\n        if (epoch + 1) == runner.max_epochs - self.num_last_epochs:\n            # Synchronize norm every epoch.\n            self.interval = 1\n\n    def after_train_epoch(self, runner):\n        \"\"\"Synchronizing norm.\"\"\"\n        epoch = runner.epoch\n        module = runner.model\n        if (epoch + 1) % self.interval == 0:\n            _, world_size = get_dist_info()\n            if world_size == 1:\n                return\n            norm_states = get_norm_states(module)\n            if len(norm_states) == 0:\n                return\n            norm_states = all_reduce_dict(norm_states, op='mean')\n            module.load_state_dict(norm_states, strict=False)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/sync_random_size_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport random\nimport warnings\n\nimport torch\nfrom mmcv.runner import get_dist_info\nfrom mmcv.runner.hooks import HOOKS, Hook\nfrom torch import distributed as dist\n\n\n@HOOKS.register_module()\nclass SyncRandomSizeHook(Hook):\n    \"\"\"Change and synchronize the random image size across ranks.\n    SyncRandomSizeHook is deprecated, please use Resize pipeline to achieve\n    similar functions. Such as `dict(type='Resize', img_scale=[(448, 448),\n    (832, 832)], multiscale_mode='range', keep_ratio=True)`.\n\n    Note: Due to the multi-process dataloader, its behavior is different\n    from YOLOX's official implementation, the official is to change the\n    size every fixed iteration interval and what we achieved is a fixed\n    epoch interval.\n\n    Args:\n        ratio_range (tuple[int]): Random ratio range. It will be multiplied\n            by 32, and then change the dataset output image size.\n            Default: (14, 26).\n        img_scale (tuple[int]): Size of input image. Default: (640, 640).\n        interval (int): The epoch interval of change image size. Default: 1.\n        device (torch.device | str): device for returned tensors.\n            Default: 'cuda'.\n    \"\"\"\n\n    def __init__(self,\n                 ratio_range=(14, 26),\n                 img_scale=(640, 640),\n                 interval=1,\n                 device='cuda'):\n        warnings.warn('DeprecationWarning: SyncRandomSizeHook is deprecated. '\n                      'Please use Resize pipeline to achieve similar '\n                      'functions. Due to the multi-process dataloader, '\n                      'its behavior is different from YOLOX\\'s official '\n                      'implementation, the official is to change the size '\n                      'every fixed iteration interval and what we achieved '\n                      'is a fixed epoch interval.')\n        self.rank, world_size = get_dist_info()\n        self.is_distributed = world_size > 1\n        self.ratio_range = ratio_range\n        self.img_scale = img_scale\n        self.interval = interval\n        self.device = device\n\n    def after_train_epoch(self, runner):\n        \"\"\"Change the dataset output image size.\"\"\"\n        if self.ratio_range is not None and (runner.epoch +\n                                             1) % self.interval == 0:\n            # Due to DDP and DP get the device behavior inconsistent,\n            # so we did not get the device from runner.model.\n            tensor = torch.LongTensor(2).to(self.device)\n\n            if self.rank == 0:\n                size_factor = self.img_scale[1] * 1. / self.img_scale[0]\n                size = random.randint(*self.ratio_range)\n                size = (int(32 * size), 32 * int(size * size_factor))\n                tensor[0] = size[0]\n                tensor[1] = size[1]\n\n            if self.is_distributed:\n                dist.barrier()\n                dist.broadcast(tensor, 0)\n\n            runner.data_loader.dataset.update_dynamic_scale(\n                (tensor[0].item(), tensor[1].item()))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/wandblogger_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport importlib\nimport os.path as osp\nimport sys\nimport warnings\n\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nfrom mmcv.runner import HOOKS\nfrom mmcv.runner.dist_utils import master_only\nfrom mmcv.runner.hooks.checkpoint import CheckpointHook\nfrom mmcv.runner.hooks.logger.wandb import WandbLoggerHook\nfrom mmcv.utils import digit_version\n\nfrom mmdet.core import DistEvalHook, EvalHook\nfrom mmdet.core.mask.structures import polygon_to_bitmap\n\n\n@HOOKS.register_module()\nclass MMDetWandbHook(WandbLoggerHook):\n    \"\"\"Enhanced Wandb logger hook for MMDetection.\n\n    Comparing with the :cls:`mmcv.runner.WandbLoggerHook`, this hook can not\n    only automatically log all the metrics but also log the following extra\n    information - saves model checkpoints as W&B Artifact, and\n    logs model prediction as interactive W&B Tables.\n\n    - Metrics: The MMDetWandbHook will automatically log training\n        and validation metrics along with system metrics (CPU/GPU).\n\n    - Checkpointing: If `log_checkpoint` is True, the checkpoint saved at\n        every checkpoint interval will be saved as W&B Artifacts.\n        This depends on the : class:`mmcv.runner.CheckpointHook` whose priority\n        is higher than this hook. Please refer to\n        https://docs.wandb.ai/guides/artifacts/model-versioning\n        to learn more about model versioning with W&B Artifacts.\n\n    - Checkpoint Metadata: If evaluation results are available for a given\n        checkpoint artifact, it will have a metadata associated with it.\n        The metadata contains the evaluation metrics computed on validation\n        data with that checkpoint along with the current epoch. It depends\n        on `EvalHook` whose priority is more than MMDetWandbHook.\n\n    - Evaluation: At every evaluation interval, the `MMDetWandbHook` logs the\n        model prediction as interactive W&B Tables. The number of samples\n        logged is given by `num_eval_images`. Currently, the `MMDetWandbHook`\n        logs the predicted bounding boxes along with the ground truth at every\n        evaluation interval. This depends on the `EvalHook` whose priority is\n        more than `MMDetWandbHook`. Also note that the data is just logged once\n        and subsequent evaluation tables uses reference to the logged data\n        to save memory usage. Please refer to\n        https://docs.wandb.ai/guides/data-vis to learn more about W&B Tables.\n\n    For more details check out W&B's MMDetection docs:\n    https://docs.wandb.ai/guides/integrations/mmdetection\n\n    ```\n    Example:\n        log_config = dict(\n            ...\n            hooks=[\n                ...,\n                dict(type='MMDetWandbHook',\n                     init_kwargs={\n                         'entity': \"YOUR_ENTITY\",\n                         'project': \"YOUR_PROJECT_NAME\"\n                     },\n                     interval=50,\n                     log_checkpoint=True,\n                     log_checkpoint_metadata=True,\n                     num_eval_images=100,\n                     bbox_score_thr=0.3)\n            ])\n    ```\n\n    Args:\n        init_kwargs (dict): A dict passed to wandb.init to initialize\n            a W&B run. Please refer to https://docs.wandb.ai/ref/python/init\n            for possible key-value pairs.\n        interval (int): Logging interval (every k iterations). Defaults to 50.\n        log_checkpoint (bool): Save the checkpoint at every checkpoint interval\n            as W&B Artifacts. Use this for model versioning where each version\n            is a checkpoint. Defaults to False.\n        log_checkpoint_metadata (bool): Log the evaluation metrics computed\n            on the validation data with the checkpoint, along with current\n            epoch as a metadata to that checkpoint.\n            Defaults to True.\n        num_eval_images (int): The number of validation images to be logged.\n            If zero, the evaluation won't be logged. Defaults to 100.\n        bbox_score_thr (float): Threshold for bounding box scores.\n            Defaults to 0.3.\n    \"\"\"\n\n    def __init__(self,\n                 init_kwargs=None,\n                 interval=50,\n                 log_checkpoint=False,\n                 log_checkpoint_metadata=False,\n                 num_eval_images=100,\n                 bbox_score_thr=0.3,\n                 **kwargs):\n        super(MMDetWandbHook, self).__init__(init_kwargs, interval, **kwargs)\n\n        self.log_checkpoint = log_checkpoint\n        self.log_checkpoint_metadata = (\n            log_checkpoint and log_checkpoint_metadata)\n        self.num_eval_images = num_eval_images\n        self.bbox_score_thr = bbox_score_thr\n        self.log_evaluation = (num_eval_images > 0)\n        self.ckpt_hook: CheckpointHook = None\n        self.eval_hook: EvalHook = None\n\n    def import_wandb(self):\n        try:\n            import wandb\n            from wandb import init  # noqa\n\n            # Fix ResourceWarning when calling wandb.log in wandb v0.12.10.\n            # https://github.com/wandb/client/issues/2837\n            if digit_version(wandb.__version__) < digit_version('0.12.10'):\n                warnings.warn(\n                    f'The current wandb {wandb.__version__} is '\n                    f'lower than v0.12.10 will cause ResourceWarning '\n                    f'when calling wandb.log, Please run '\n                    f'\"pip install --upgrade wandb\"')\n\n        except ImportError:\n            raise ImportError(\n                'Please run \"pip install \"wandb>=0.12.10\"\" to install wandb')\n        self.wandb = wandb\n\n    @master_only\n    def before_run(self, runner):\n        super(MMDetWandbHook, self).before_run(runner)\n\n        # Save and Log config.\n        if runner.meta is not None and runner.meta.get('exp_name',\n                                                       None) is not None:\n            src_cfg_path = osp.join(runner.work_dir,\n                                    runner.meta.get('exp_name', None))\n            if osp.exists(src_cfg_path):\n                self.wandb.save(src_cfg_path, base_path=runner.work_dir)\n                self._update_wandb_config(runner)\n        else:\n            runner.logger.warning('No meta information found in the runner. ')\n\n        # Inspect CheckpointHook and EvalHook\n        for hook in runner.hooks:\n            if isinstance(hook, CheckpointHook):\n                self.ckpt_hook = hook\n            if isinstance(hook, (EvalHook, DistEvalHook)):\n                self.eval_hook = hook\n\n        # Check conditions to log checkpoint\n        if self.log_checkpoint:\n            if self.ckpt_hook is None:\n                self.log_checkpoint = False\n                self.log_checkpoint_metadata = False\n                runner.logger.warning(\n                    'To log checkpoint in MMDetWandbHook, `CheckpointHook` is'\n                    'required, please check hooks in the runner.')\n            else:\n                self.ckpt_interval = self.ckpt_hook.interval\n\n        # Check conditions to log evaluation\n        if self.log_evaluation or self.log_checkpoint_metadata:\n            if self.eval_hook is None:\n                self.log_evaluation = False\n                self.log_checkpoint_metadata = False\n                runner.logger.warning(\n                    'To log evaluation or checkpoint metadata in '\n                    'MMDetWandbHook, `EvalHook` or `DistEvalHook` in mmdet '\n                    'is required, please check whether the validation '\n                    'is enabled.')\n            else:\n                self.eval_interval = self.eval_hook.interval\n                self.val_dataset = self.eval_hook.dataloader.dataset\n                # Determine the number of samples to be logged.\n                if self.num_eval_images > len(self.val_dataset):\n                    self.num_eval_images = len(self.val_dataset)\n                    runner.logger.warning(\n                        f'The num_eval_images ({self.num_eval_images}) is '\n                        'greater than the total number of validation samples '\n                        f'({len(self.val_dataset)}). The complete validation '\n                        'dataset will be logged.')\n\n        # Check conditions to log checkpoint metadata\n        if self.log_checkpoint_metadata:\n            assert self.ckpt_interval % self.eval_interval == 0, \\\n                'To log checkpoint metadata in MMDetWandbHook, the interval ' \\\n                f'of checkpoint saving ({self.ckpt_interval}) should be ' \\\n                'divisible by the interval of evaluation ' \\\n                f'({self.eval_interval}).'\n\n        # Initialize evaluation table\n        if self.log_evaluation:\n            # Initialize data table\n            self._init_data_table()\n            # Add data to the data table\n            self._add_ground_truth(runner)\n            # Log ground truth data\n            self._log_data_table()\n\n    @master_only\n    def after_train_epoch(self, runner):\n        super(MMDetWandbHook, self).after_train_epoch(runner)\n\n        if not self.by_epoch:\n            return\n\n        # Log checkpoint and metadata.\n        if (self.log_checkpoint\n                and self.every_n_epochs(runner, self.ckpt_interval)\n                or (self.ckpt_hook.save_last and self.is_last_epoch(runner))):\n            if self.log_checkpoint_metadata and self.eval_hook:\n                metadata = {\n                    'epoch': runner.epoch + 1,\n                    **self._get_eval_results()\n                }\n            else:\n                metadata = None\n            aliases = [f'epoch_{runner.epoch + 1}', 'latest']\n            model_path = osp.join(self.ckpt_hook.out_dir,\n                                  f'epoch_{runner.epoch + 1}.pth')\n            self._log_ckpt_as_artifact(model_path, aliases, metadata)\n\n        # Save prediction table\n        if self.log_evaluation and self.eval_hook._should_evaluate(runner):\n            results = self.eval_hook.latest_results\n            # Initialize evaluation table\n            self._init_pred_table()\n            # Log predictions\n            self._log_predictions(results)\n            # Log the table\n            self._log_eval_table(runner.epoch + 1)\n\n    @master_only\n    def after_train_iter(self, runner):\n        if self.get_mode(runner) == 'train':\n            # An ugly patch. The iter-based eval hook will call the\n            # `after_train_iter` method of all logger hooks before evaluation.\n            # Use this trick to skip that call.\n            # Don't call super method at first, it will clear the log_buffer\n            return super(MMDetWandbHook, self).after_train_iter(runner)\n        else:\n            super(MMDetWandbHook, self).after_train_iter(runner)\n\n        if self.by_epoch:\n            return\n\n        # Save checkpoint and metadata\n        if (self.log_checkpoint\n                and self.every_n_iters(runner, self.ckpt_interval)\n                or (self.ckpt_hook.save_last and self.is_last_iter(runner))):\n            if self.log_checkpoint_metadata and self.eval_hook:\n                metadata = {\n                    'iter': runner.iter + 1,\n                    **self._get_eval_results()\n                }\n            else:\n                metadata = None\n            aliases = [f'iter_{runner.iter + 1}', 'latest']\n            model_path = osp.join(self.ckpt_hook.out_dir,\n                                  f'iter_{runner.iter + 1}.pth')\n            self._log_ckpt_as_artifact(model_path, aliases, metadata)\n\n        # Save prediction table\n        if self.log_evaluation and self.eval_hook._should_evaluate(runner):\n            results = self.eval_hook.latest_results\n            # Initialize evaluation table\n            self._init_pred_table()\n            # Log predictions\n            self._log_predictions(results)\n            # Log the table\n            self._log_eval_table(runner.iter + 1)\n\n    @master_only\n    def after_run(self, runner):\n        self.wandb.finish()\n\n    def _update_wandb_config(self, runner):\n        \"\"\"Update wandb config.\"\"\"\n        # Import the config file.\n        sys.path.append(runner.work_dir)\n        config_filename = runner.meta['exp_name'][:-3]\n        configs = importlib.import_module(config_filename)\n        # Prepare a nested dict of config variables.\n        config_keys = [key for key in dir(configs) if not key.startswith('__')]\n        config_dict = {key: getattr(configs, key) for key in config_keys}\n        # Update the W&B config.\n        self.wandb.config.update(config_dict)\n\n    def _log_ckpt_as_artifact(self, model_path, aliases, metadata=None):\n        \"\"\"Log model checkpoint as  W&B Artifact.\n\n        Args:\n            model_path (str): Path of the checkpoint to log.\n            aliases (list): List of the aliases associated with this artifact.\n            metadata (dict, optional): Metadata associated with this artifact.\n        \"\"\"\n        model_artifact = self.wandb.Artifact(\n            f'run_{self.wandb.run.id}_model', type='model', metadata=metadata)\n        model_artifact.add_file(model_path)\n        self.wandb.log_artifact(model_artifact, aliases=aliases)\n\n    def _get_eval_results(self):\n        \"\"\"Get model evaluation results.\"\"\"\n        results = self.eval_hook.latest_results\n        eval_results = self.val_dataset.evaluate(\n            results, logger='silent', **self.eval_hook.eval_kwargs)\n        return eval_results\n\n    def _init_data_table(self):\n        \"\"\"Initialize the W&B Tables for validation data.\"\"\"\n        columns = ['image_name', 'image']\n        self.data_table = self.wandb.Table(columns=columns)\n\n    def _init_pred_table(self):\n        \"\"\"Initialize the W&B Tables for model evaluation.\"\"\"\n        columns = ['image_name', 'ground_truth', 'prediction']\n        self.eval_table = self.wandb.Table(columns=columns)\n\n    def _add_ground_truth(self, runner):\n        # Get image loading pipeline\n        from mmdet.datasets.pipelines import LoadImageFromFile\n        img_loader = None\n        for t in self.val_dataset.pipeline.transforms:\n            if isinstance(t, LoadImageFromFile):\n                img_loader = t\n\n        if img_loader is None:\n            self.log_evaluation = False\n            runner.logger.warning(\n                'LoadImageFromFile is required to add images '\n                'to W&B Tables.')\n            return\n\n        # Select the images to be logged.\n        self.eval_image_indexs = np.arange(len(self.val_dataset))\n        # Set seed so that same validation set is logged each time.\n        np.random.seed(42)\n        np.random.shuffle(self.eval_image_indexs)\n        self.eval_image_indexs = self.eval_image_indexs[:self.num_eval_images]\n\n        CLASSES = self.val_dataset.CLASSES\n        self.class_id_to_label = {\n            id + 1: name\n            for id, name in enumerate(CLASSES)\n        }\n        self.class_set = self.wandb.Classes([{\n            'id': id,\n            'name': name\n        } for id, name in self.class_id_to_label.items()])\n\n        img_prefix = self.val_dataset.img_prefix\n\n        for idx in self.eval_image_indexs:\n            img_info = self.val_dataset.data_infos[idx]\n            image_name = img_info.get('filename', f'img_{idx}')\n            img_height, img_width = img_info['height'], img_info['width']\n\n            img_meta = img_loader(\n                dict(img_info=img_info, img_prefix=img_prefix))\n\n            # Get image and convert from BGR to RGB\n            image = mmcv.bgr2rgb(img_meta['img'])\n\n            data_ann = self.val_dataset.get_ann_info(idx)\n            bboxes = data_ann['bboxes']\n            labels = data_ann['labels']\n            masks = data_ann.get('masks', None)\n\n            # Get dict of bounding boxes to be logged.\n            assert len(bboxes) == len(labels)\n            wandb_boxes = self._get_wandb_bboxes(bboxes, labels)\n\n            # Get dict of masks to be logged.\n            if masks is not None:\n                wandb_masks = self._get_wandb_masks(\n                    masks,\n                    labels,\n                    is_poly_mask=True,\n                    height=img_height,\n                    width=img_width)\n            else:\n                wandb_masks = None\n            # TODO: Panoramic segmentation visualization.\n\n            # Log a row to the data table.\n            self.data_table.add_data(\n                image_name,\n                self.wandb.Image(\n                    image,\n                    boxes=wandb_boxes,\n                    masks=wandb_masks,\n                    classes=self.class_set))\n\n    def _log_predictions(self, results):\n        table_idxs = self.data_table_ref.get_index()\n        assert len(table_idxs) == len(self.eval_image_indexs)\n\n        for ndx, eval_image_index in enumerate(self.eval_image_indexs):\n            # Get the result\n            result = results[eval_image_index]\n            if isinstance(result, tuple):\n                bbox_result, segm_result = result\n                if isinstance(segm_result, tuple):\n                    segm_result = segm_result[0]  # ms rcnn\n            else:\n                bbox_result, segm_result = result, None\n            assert len(bbox_result) == len(self.class_id_to_label)\n\n            # Get labels\n            bboxes = np.vstack(bbox_result)\n            labels = [\n                np.full(bbox.shape[0], i, dtype=np.int32)\n                for i, bbox in enumerate(bbox_result)\n            ]\n            labels = np.concatenate(labels)\n\n            # Get segmentation mask if available.\n            segms = None\n            if segm_result is not None and len(labels) > 0:\n                segms = mmcv.concat_list(segm_result)\n                segms = mask_util.decode(segms)\n                segms = segms.transpose(2, 0, 1)\n                assert len(segms) == len(labels)\n            # TODO: Panoramic segmentation visualization.\n\n            # Remove bounding boxes and masks with score lower than threshold.\n            if self.bbox_score_thr > 0:\n                assert bboxes is not None and bboxes.shape[1] == 5\n                scores = bboxes[:, -1]\n                inds = scores > self.bbox_score_thr\n                bboxes = bboxes[inds, :]\n                labels = labels[inds]\n                if segms is not None:\n                    segms = segms[inds, ...]\n\n            # Get dict of bounding boxes to be logged.\n            wandb_boxes = self._get_wandb_bboxes(bboxes, labels, log_gt=False)\n            # Get dict of masks to be logged.\n            if segms is not None:\n                wandb_masks = self._get_wandb_masks(segms, labels)\n            else:\n                wandb_masks = None\n\n            # Log a row to the eval table.\n            self.eval_table.add_data(\n                self.data_table_ref.data[ndx][0],\n                self.data_table_ref.data[ndx][1],\n                self.wandb.Image(\n                    self.data_table_ref.data[ndx][1],\n                    boxes=wandb_boxes,\n                    masks=wandb_masks,\n                    classes=self.class_set))\n\n    def _get_wandb_bboxes(self, bboxes, labels, log_gt=True):\n        \"\"\"Get list of structured dict for logging bounding boxes to W&B.\n\n        Args:\n            bboxes (list): List of bounding box coordinates in\n                        (minX, minY, maxX, maxY) format.\n            labels (int): List of label ids.\n            log_gt (bool): Whether to log ground truth or prediction boxes.\n\n        Returns:\n            Dictionary of bounding boxes to be logged.\n        \"\"\"\n        wandb_boxes = {}\n\n        box_data = []\n        for bbox, label in zip(bboxes, labels):\n            if not isinstance(label, int):\n                label = int(label)\n            label = label + 1\n\n            if len(bbox) == 5:\n                confidence = float(bbox[4])\n                class_name = self.class_id_to_label[label]\n                box_caption = f'{class_name} {confidence:.2f}'\n            else:\n                box_caption = str(self.class_id_to_label[label])\n\n            position = dict(\n                minX=int(bbox[0]),\n                minY=int(bbox[1]),\n                maxX=int(bbox[2]),\n                maxY=int(bbox[3]))\n\n            box_data.append({\n                'position': position,\n                'class_id': label,\n                'box_caption': box_caption,\n                'domain': 'pixel'\n            })\n\n        wandb_bbox_dict = {\n            'box_data': box_data,\n            'class_labels': self.class_id_to_label\n        }\n\n        if log_gt:\n            wandb_boxes['ground_truth'] = wandb_bbox_dict\n        else:\n            wandb_boxes['predictions'] = wandb_bbox_dict\n\n        return wandb_boxes\n\n    def _get_wandb_masks(self,\n                         masks,\n                         labels,\n                         is_poly_mask=False,\n                         height=None,\n                         width=None):\n        \"\"\"Get list of structured dict for logging masks to W&B.\n\n        Args:\n            masks (list): List of masks.\n            labels (int): List of label ids.\n            is_poly_mask (bool): Whether the mask is polygonal or not.\n                This is true for CocoDataset.\n            height (int): Height of the image.\n            width (int): Width of the image.\n\n        Returns:\n            Dictionary of masks to be logged.\n        \"\"\"\n        mask_label_dict = dict()\n        for mask, label in zip(masks, labels):\n            label = label + 1\n            # Get bitmap mask from polygon.\n            if is_poly_mask:\n                if height is not None and width is not None:\n                    mask = polygon_to_bitmap(mask, height, width)\n            # Create composite masks for each class.\n            if label not in mask_label_dict.keys():\n                mask_label_dict[label] = mask\n            else:\n                mask_label_dict[label] = np.logical_or(mask_label_dict[label],\n                                                       mask)\n\n        wandb_masks = dict()\n        for key, value in mask_label_dict.items():\n            # Create mask for that class.\n            value = value.astype(np.uint8)\n            value[value > 0] = key\n\n            # Create dict of masks for logging.\n            class_name = self.class_id_to_label[key]\n            wandb_masks[class_name] = {\n                'mask_data': value,\n                'class_labels': self.class_id_to_label\n            }\n\n        return wandb_masks\n\n    def _log_data_table(self):\n        \"\"\"Log the W&B Tables for validation data as artifact and calls\n        `use_artifact` on it so that the evaluation table can use the reference\n        of already uploaded images.\n\n        This allows the data to be uploaded just once.\n        \"\"\"\n        data_artifact = self.wandb.Artifact('val', type='dataset')\n        data_artifact.add(self.data_table, 'val_data')\n\n        if not self.wandb.run.offline:\n            self.wandb.run.use_artifact(data_artifact)\n            data_artifact.wait()\n            self.data_table_ref = data_artifact.get('val_data')\n        else:\n            self.data_table_ref = self.data_table\n\n    def _log_eval_table(self, idx):\n        \"\"\"Log the W&B Tables for model evaluation.\n\n        The table will be logged multiple times creating new version. Use this\n        to compare models at different intervals interactively.\n        \"\"\"\n        pred_artifact = self.wandb.Artifact(\n            f'run_{self.wandb.run.id}_pred', type='evaluation')\n        pred_artifact.add(self.eval_table, 'eval_data')\n        if self.by_epoch:\n            aliases = ['latest', f'epoch_{idx}']\n        else:\n            aliases = ['latest', f'iter_{idx}']\n        self.wandb.run.log_artifact(pred_artifact, aliases=aliases)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/yolox_lrupdater_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.runner.hooks import HOOKS\nfrom mmcv.runner.hooks.lr_updater import (CosineAnnealingLrUpdaterHook,\n                                          annealing_cos)\n\n\n@HOOKS.register_module()\nclass YOLOXLrUpdaterHook(CosineAnnealingLrUpdaterHook):\n    \"\"\"YOLOX learning rate scheme.\n\n    There are two main differences between YOLOXLrUpdaterHook\n    and CosineAnnealingLrUpdaterHook.\n\n       1. When the current running epoch is greater than\n           `max_epoch-last_epoch`, a fixed learning rate will be used\n       2. The exp warmup scheme is different with LrUpdaterHook in MMCV\n\n    Args:\n        num_last_epochs (int): The number of epochs with a fixed learning rate\n           before the end of the training.\n    \"\"\"\n\n    def __init__(self, num_last_epochs, **kwargs):\n        self.num_last_epochs = num_last_epochs\n        super(YOLOXLrUpdaterHook, self).__init__(**kwargs)\n\n    def get_warmup_lr(self, cur_iters):\n\n        def _get_warmup_lr(cur_iters, regular_lr):\n            # exp warmup scheme\n            k = self.warmup_ratio * pow(\n                (cur_iters + 1) / float(self.warmup_iters), 2)\n            warmup_lr = [_lr * k for _lr in regular_lr]\n            return warmup_lr\n\n        if isinstance(self.base_lr, dict):\n            lr_groups = {}\n            for key, base_lr in self.base_lr.items():\n                lr_groups[key] = _get_warmup_lr(cur_iters, base_lr)\n            return lr_groups\n        else:\n            return _get_warmup_lr(cur_iters, self.base_lr)\n\n    def get_lr(self, runner, base_lr):\n        last_iter = len(runner.data_loader) * self.num_last_epochs\n\n        if self.by_epoch:\n            progress = runner.epoch\n            max_progress = runner.max_epochs\n        else:\n            progress = runner.iter\n            max_progress = runner.max_iters\n\n        progress += 1\n\n        if self.min_lr_ratio is not None:\n            target_lr = base_lr * self.min_lr_ratio\n        else:\n            target_lr = self.min_lr\n\n        if progress >= max_progress - last_iter:\n            # fixed learning rate\n            return target_lr\n        else:\n            return annealing_cos(\n                base_lr, target_lr, (progress - self.warmup_iters) /\n                (max_progress - self.warmup_iters - last_iter))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/hook/yolox_mode_switch_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.parallel import is_module_wrapper\nfrom mmcv.runner.hooks import HOOKS, Hook\n\n\n@HOOKS.register_module()\nclass YOLOXModeSwitchHook(Hook):\n    \"\"\"Switch the mode of YOLOX during training.\n\n    This hook turns off the mosaic and mixup data augmentation and switches\n    to use L1 loss in bbox_head.\n\n    Args:\n        num_last_epochs (int): The number of latter epochs in the end of the\n            training to close the data augmentation and switch to L1 loss.\n            Default: 15.\n       skip_type_keys (list[str], optional): Sequence of type string to be\n            skip pipeline. Default: ('Mosaic', 'RandomAffine', 'MixUp')\n    \"\"\"\n\n    def __init__(self,\n                 num_last_epochs=15,\n                 skip_type_keys=('Mosaic', 'RandomAffine', 'MixUp')):\n        self.num_last_epochs = num_last_epochs\n        self.skip_type_keys = skip_type_keys\n        self._restart_dataloader = False\n\n    def before_train_epoch(self, runner):\n        \"\"\"Close mosaic and mixup augmentation and switches to use L1 loss.\"\"\"\n        epoch = runner.epoch\n        train_loader = runner.data_loader\n        model = runner.model\n        if is_module_wrapper(model):\n            model = model.module\n        if (epoch + 1) == runner.max_epochs - self.num_last_epochs:\n            runner.logger.info('No mosaic and mixup aug now!')\n            # The dataset pipeline cannot be updated when persistent_workers\n            # is True, so we need to force the dataloader's multi-process\n            # restart. This is a very hacky approach.\n            train_loader.dataset.update_skip_type_keys(self.skip_type_keys)\n            if hasattr(train_loader, 'persistent_workers'\n                       ) and train_loader.persistent_workers is True:\n                train_loader._DataLoader__initialized = False\n                train_loader._iterator = None\n                self._restart_dataloader = True\n            runner.logger.info('Add additional L1 loss now!')\n            model.bbox_head.use_l1 = True\n        else:\n            # Once the restart is complete, we need to restore\n            # the initialization flag.\n            if self._restart_dataloader:\n                train_loader._DataLoader__initialized = True\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/mask/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .mask_target import mask_target\nfrom .structures import BaseInstanceMasks, BitmapMasks, PolygonMasks\nfrom .utils import encode_mask_results, mask2bbox, split_combined_polys\n\n__all__ = [\n    'split_combined_polys', 'mask_target', 'BaseInstanceMasks', 'BitmapMasks',\n    'PolygonMasks', 'encode_mask_results', 'mask2bbox'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/mask/mask_target.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom torch.nn.modules.utils import _pair\n\n\ndef mask_target(pos_proposals_list, pos_assigned_gt_inds_list, gt_masks_list,\n                cfg):\n    \"\"\"Compute mask target for positive proposals in multiple images.\n\n    Args:\n        pos_proposals_list (list[Tensor]): Positive proposals in multiple\n            images.\n        pos_assigned_gt_inds_list (list[Tensor]): Assigned GT indices for each\n            positive proposals.\n        gt_masks_list (list[:obj:`BaseInstanceMasks`]): Ground truth masks of\n            each image.\n        cfg (dict): Config dict that specifies the mask size.\n\n    Returns:\n        list[Tensor]: Mask target of each image.\n\n    Example:\n        >>> import mmcv\n        >>> import mmdet\n        >>> from mmdet.core.mask import BitmapMasks\n        >>> from mmdet.core.mask.mask_target import *\n        >>> H, W = 17, 18\n        >>> cfg = mmcv.Config({'mask_size': (13, 14)})\n        >>> rng = np.random.RandomState(0)\n        >>> # Positive proposals (tl_x, tl_y, br_x, br_y) for each image\n        >>> pos_proposals_list = [\n        >>>     torch.Tensor([\n        >>>         [ 7.2425,  5.5929, 13.9414, 14.9541],\n        >>>         [ 7.3241,  3.6170, 16.3850, 15.3102],\n        >>>     ]),\n        >>>     torch.Tensor([\n        >>>         [ 4.8448, 6.4010, 7.0314, 9.7681],\n        >>>         [ 5.9790, 2.6989, 7.4416, 4.8580],\n        >>>         [ 0.0000, 0.0000, 0.1398, 9.8232],\n        >>>     ]),\n        >>> ]\n        >>> # Corresponding class index for each proposal for each image\n        >>> pos_assigned_gt_inds_list = [\n        >>>     torch.LongTensor([7, 0]),\n        >>>     torch.LongTensor([5, 4, 1]),\n        >>> ]\n        >>> # Ground truth mask for each true object for each image\n        >>> gt_masks_list = [\n        >>>     BitmapMasks(rng.rand(8, H, W), height=H, width=W),\n        >>>     BitmapMasks(rng.rand(6, H, W), height=H, width=W),\n        >>> ]\n        >>> mask_targets = mask_target(\n        >>>     pos_proposals_list, pos_assigned_gt_inds_list,\n        >>>     gt_masks_list, cfg)\n        >>> assert mask_targets.shape == (5,) + cfg['mask_size']\n    \"\"\"\n    cfg_list = [cfg for _ in range(len(pos_proposals_list))]\n    mask_targets = map(mask_target_single, pos_proposals_list,\n                       pos_assigned_gt_inds_list, gt_masks_list, cfg_list)\n    mask_targets = list(mask_targets)\n    if len(mask_targets) > 0:\n        mask_targets = torch.cat(mask_targets)\n    return mask_targets\n\n\ndef mask_target_single(pos_proposals, pos_assigned_gt_inds, gt_masks, cfg):\n    \"\"\"Compute mask target for each positive proposal in the image.\n\n    Args:\n        pos_proposals (Tensor): Positive proposals.\n        pos_assigned_gt_inds (Tensor): Assigned GT inds of positive proposals.\n        gt_masks (:obj:`BaseInstanceMasks`): GT masks in the format of Bitmap\n            or Polygon.\n        cfg (dict): Config dict that indicate the mask size.\n\n    Returns:\n        Tensor: Mask target of each positive proposals in the image.\n\n    Example:\n        >>> import mmcv\n        >>> import mmdet\n        >>> from mmdet.core.mask import BitmapMasks\n        >>> from mmdet.core.mask.mask_target import *  # NOQA\n        >>> H, W = 32, 32\n        >>> cfg = mmcv.Config({'mask_size': (7, 11)})\n        >>> rng = np.random.RandomState(0)\n        >>> # Masks for each ground truth box (relative to the image)\n        >>> gt_masks_data = rng.rand(3, H, W)\n        >>> gt_masks = BitmapMasks(gt_masks_data, height=H, width=W)\n        >>> # Predicted positive boxes in one image\n        >>> pos_proposals = torch.FloatTensor([\n        >>>     [ 16.2,   5.5, 19.9, 20.9],\n        >>>     [ 17.3,  13.6, 19.3, 19.3],\n        >>>     [ 14.8,  16.4, 17.0, 23.7],\n        >>>     [  0.0,   0.0, 16.0, 16.0],\n        >>>     [  4.0,   0.0, 20.0, 16.0],\n        >>> ])\n        >>> # For each predicted proposal, its assignment to a gt mask\n        >>> pos_assigned_gt_inds = torch.LongTensor([0, 1, 2, 1, 1])\n        >>> mask_targets = mask_target_single(\n        >>>     pos_proposals, pos_assigned_gt_inds, gt_masks, cfg)\n        >>> assert mask_targets.shape == (5,) + cfg['mask_size']\n    \"\"\"\n    device = pos_proposals.device\n    mask_size = _pair(cfg.mask_size)\n    binarize = not cfg.get('soft_mask_target', False)\n    num_pos = pos_proposals.size(0)\n    if num_pos > 0:\n        proposals_np = pos_proposals.cpu().numpy()\n        maxh, maxw = gt_masks.height, gt_masks.width\n        proposals_np[:, [0, 2]] = np.clip(proposals_np[:, [0, 2]], 0, maxw)\n        proposals_np[:, [1, 3]] = np.clip(proposals_np[:, [1, 3]], 0, maxh)\n        pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()\n\n        mask_targets = gt_masks.crop_and_resize(\n            proposals_np,\n            mask_size,\n            device=device,\n            inds=pos_assigned_gt_inds,\n            binarize=binarize).to_ndarray()\n\n        mask_targets = torch.from_numpy(mask_targets).float().to(device)\n    else:\n        mask_targets = pos_proposals.new_zeros((0, ) + mask_size)\n\n    return mask_targets\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/mask/structures.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nimport cv2\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as maskUtils\nimport torch\nfrom mmcv.ops.roi_align import roi_align\n\n\nclass BaseInstanceMasks(metaclass=ABCMeta):\n    \"\"\"Base class for instance masks.\"\"\"\n\n    @abstractmethod\n    def rescale(self, scale, interpolation='nearest'):\n        \"\"\"Rescale masks as large as possible while keeping the aspect ratio.\n        For details can refer to `mmcv.imrescale`.\n\n        Args:\n            scale (tuple[int]): The maximum size (h, w) of rescaled mask.\n            interpolation (str): Same as :func:`mmcv.imrescale`.\n\n        Returns:\n            BaseInstanceMasks: The rescaled masks.\n        \"\"\"\n\n    @abstractmethod\n    def resize(self, out_shape, interpolation='nearest'):\n        \"\"\"Resize masks to the given out_shape.\n\n        Args:\n            out_shape: Target (h, w) of resized mask.\n            interpolation (str): See :func:`mmcv.imresize`.\n\n        Returns:\n            BaseInstanceMasks: The resized masks.\n        \"\"\"\n\n    @abstractmethod\n    def flip(self, flip_direction='horizontal'):\n        \"\"\"Flip masks alone the given direction.\n\n        Args:\n            flip_direction (str): Either 'horizontal' or 'vertical'.\n\n        Returns:\n            BaseInstanceMasks: The flipped masks.\n        \"\"\"\n\n    @abstractmethod\n    def pad(self, out_shape, pad_val):\n        \"\"\"Pad masks to the given size of (h, w).\n\n        Args:\n            out_shape (tuple[int]): Target (h, w) of padded mask.\n            pad_val (int): The padded value.\n\n        Returns:\n            BaseInstanceMasks: The padded masks.\n        \"\"\"\n\n    @abstractmethod\n    def crop(self, bbox):\n        \"\"\"Crop each mask by the given bbox.\n\n        Args:\n            bbox (ndarray): Bbox in format [x1, y1, x2, y2], shape (4, ).\n\n        Return:\n            BaseInstanceMasks: The cropped masks.\n        \"\"\"\n\n    @abstractmethod\n    def crop_and_resize(self,\n                        bboxes,\n                        out_shape,\n                        inds,\n                        device,\n                        interpolation='bilinear',\n                        binarize=True):\n        \"\"\"Crop and resize masks by the given bboxes.\n\n        This function is mainly used in mask targets computation.\n        It firstly align mask to bboxes by assigned_inds, then crop mask by the\n        assigned bbox and resize to the size of (mask_h, mask_w)\n\n        Args:\n            bboxes (Tensor): Bboxes in format [x1, y1, x2, y2], shape (N, 4)\n            out_shape (tuple[int]): Target (h, w) of resized mask\n            inds (ndarray): Indexes to assign masks to each bbox,\n                shape (N,) and values should be between [0, num_masks - 1].\n            device (str): Device of bboxes\n            interpolation (str): See `mmcv.imresize`\n            binarize (bool): if True fractional values are rounded to 0 or 1\n                after the resize operation. if False and unsupported an error\n                will be raised. Defaults to True.\n\n        Return:\n            BaseInstanceMasks: the cropped and resized masks.\n        \"\"\"\n\n    @abstractmethod\n    def expand(self, expanded_h, expanded_w, top, left):\n        \"\"\"see :class:`Expand`.\"\"\"\n\n    @property\n    @abstractmethod\n    def areas(self):\n        \"\"\"ndarray: areas of each instance.\"\"\"\n\n    @abstractmethod\n    def to_ndarray(self):\n        \"\"\"Convert masks to the format of ndarray.\n\n        Return:\n            ndarray: Converted masks in the format of ndarray.\n        \"\"\"\n\n    @abstractmethod\n    def to_tensor(self, dtype, device):\n        \"\"\"Convert masks to the format of Tensor.\n\n        Args:\n            dtype (str): Dtype of converted mask.\n            device (torch.device): Device of converted masks.\n\n        Returns:\n            Tensor: Converted masks in the format of Tensor.\n        \"\"\"\n\n    @abstractmethod\n    def translate(self,\n                  out_shape,\n                  offset,\n                  direction='horizontal',\n                  fill_val=0,\n                  interpolation='bilinear'):\n        \"\"\"Translate the masks.\n\n        Args:\n            out_shape (tuple[int]): Shape for output mask, format (h, w).\n            offset (int | float): The offset for translate.\n            direction (str): The translate direction, either \"horizontal\"\n                or \"vertical\".\n            fill_val (int | float): Border value. Default 0.\n            interpolation (str): Same as :func:`mmcv.imtranslate`.\n\n        Returns:\n            Translated masks.\n        \"\"\"\n\n    def shear(self,\n              out_shape,\n              magnitude,\n              direction='horizontal',\n              border_value=0,\n              interpolation='bilinear'):\n        \"\"\"Shear the masks.\n\n        Args:\n            out_shape (tuple[int]): Shape for output mask, format (h, w).\n            magnitude (int | float): The magnitude used for shear.\n            direction (str): The shear direction, either \"horizontal\"\n                or \"vertical\".\n            border_value (int | tuple[int]): Value used in case of a\n                constant border. Default 0.\n            interpolation (str): Same as in :func:`mmcv.imshear`.\n\n        Returns:\n            ndarray: Sheared masks.\n        \"\"\"\n\n    @abstractmethod\n    def rotate(self, out_shape, angle, center=None, scale=1.0, fill_val=0):\n        \"\"\"Rotate the masks.\n\n        Args:\n            out_shape (tuple[int]): Shape for output mask, format (h, w).\n            angle (int | float): Rotation angle in degrees. Positive values\n                mean counter-clockwise rotation.\n            center (tuple[float], optional): Center point (w, h) of the\n                rotation in source image. If not specified, the center of\n                the image will be used.\n            scale (int | float): Isotropic scale factor.\n            fill_val (int | float): Border value. Default 0 for masks.\n\n        Returns:\n            Rotated masks.\n        \"\"\"\n\n\nclass BitmapMasks(BaseInstanceMasks):\n    \"\"\"This class represents masks in the form of bitmaps.\n\n    Args:\n        masks (ndarray): ndarray of masks in shape (N, H, W), where N is\n            the number of objects.\n        height (int): height of masks\n        width (int): width of masks\n\n    Example:\n        >>> from mmdet.core.mask.structures import *  # NOQA\n        >>> num_masks, H, W = 3, 32, 32\n        >>> rng = np.random.RandomState(0)\n        >>> masks = (rng.rand(num_masks, H, W) > 0.1).astype(np.int)\n        >>> self = BitmapMasks(masks, height=H, width=W)\n\n        >>> # demo crop_and_resize\n        >>> num_boxes = 5\n        >>> bboxes = np.array([[0, 0, 30, 10.0]] * num_boxes)\n        >>> out_shape = (14, 14)\n        >>> inds = torch.randint(0, len(self), size=(num_boxes,))\n        >>> device = 'cpu'\n        >>> interpolation = 'bilinear'\n        >>> new = self.crop_and_resize(\n        ...     bboxes, out_shape, inds, device, interpolation)\n        >>> assert len(new) == num_boxes\n        >>> assert new.height, new.width == out_shape\n    \"\"\"\n\n    def __init__(self, masks, height, width):\n        self.height = height\n        self.width = width\n        if len(masks) == 0:\n            self.masks = np.empty((0, self.height, self.width), dtype=np.uint8)\n        else:\n            assert isinstance(masks, (list, np.ndarray))\n            if isinstance(masks, list):\n                assert isinstance(masks[0], np.ndarray)\n                assert masks[0].ndim == 2  # (H, W)\n            else:\n                assert masks.ndim == 3  # (N, H, W)\n\n            self.masks = np.stack(masks).reshape(-1, height, width)\n            assert self.masks.shape[1] == self.height\n            assert self.masks.shape[2] == self.width\n\n    def __getitem__(self, index):\n        \"\"\"Index the BitmapMask.\n\n        Args:\n            index (int | ndarray): Indices in the format of integer or ndarray.\n\n        Returns:\n            :obj:`BitmapMasks`: Indexed bitmap masks.\n        \"\"\"\n        masks = self.masks[index].reshape(-1, self.height, self.width)\n        return BitmapMasks(masks, self.height, self.width)\n\n    def __iter__(self):\n        return iter(self.masks)\n\n    def __repr__(self):\n        s = self.__class__.__name__ + '('\n        s += f'num_masks={len(self.masks)}, '\n        s += f'height={self.height}, '\n        s += f'width={self.width})'\n        return s\n\n    def __len__(self):\n        \"\"\"Number of masks.\"\"\"\n        return len(self.masks)\n\n    def rescale(self, scale, interpolation='nearest'):\n        \"\"\"See :func:`BaseInstanceMasks.rescale`.\"\"\"\n        if len(self.masks) == 0:\n            new_w, new_h = mmcv.rescale_size((self.width, self.height), scale)\n            rescaled_masks = np.empty((0, new_h, new_w), dtype=np.uint8)\n        else:\n            rescaled_masks = np.stack([\n                mmcv.imrescale(mask, scale, interpolation=interpolation)\n                for mask in self.masks\n            ])\n        height, width = rescaled_masks.shape[1:]\n        return BitmapMasks(rescaled_masks, height, width)\n\n    def resize(self, out_shape, interpolation='nearest'):\n        \"\"\"See :func:`BaseInstanceMasks.resize`.\"\"\"\n        if len(self.masks) == 0:\n            resized_masks = np.empty((0, *out_shape), dtype=np.uint8)\n        else:\n            resized_masks = np.stack([\n                mmcv.imresize(\n                    mask, out_shape[::-1], interpolation=interpolation)\n                for mask in self.masks\n            ])\n        return BitmapMasks(resized_masks, *out_shape)\n\n    def flip(self, flip_direction='horizontal'):\n        \"\"\"See :func:`BaseInstanceMasks.flip`.\"\"\"\n        assert flip_direction in ('horizontal', 'vertical', 'diagonal')\n\n        if len(self.masks) == 0:\n            flipped_masks = self.masks\n        else:\n            flipped_masks = np.stack([\n                mmcv.imflip(mask, direction=flip_direction)\n                for mask in self.masks\n            ])\n        return BitmapMasks(flipped_masks, self.height, self.width)\n\n    def pad(self, out_shape, pad_val=0):\n        \"\"\"See :func:`BaseInstanceMasks.pad`.\"\"\"\n        if len(self.masks) == 0:\n            padded_masks = np.empty((0, *out_shape), dtype=np.uint8)\n        else:\n            padded_masks = np.stack([\n                mmcv.impad(mask, shape=out_shape, pad_val=pad_val)\n                for mask in self.masks\n            ])\n        return BitmapMasks(padded_masks, *out_shape)\n\n    def crop(self, bbox):\n        \"\"\"See :func:`BaseInstanceMasks.crop`.\"\"\"\n        assert isinstance(bbox, np.ndarray)\n        assert bbox.ndim == 1\n\n        # clip the boundary\n        bbox = bbox.copy()\n        bbox[0::2] = np.clip(bbox[0::2], 0, self.width)\n        bbox[1::2] = np.clip(bbox[1::2], 0, self.height)\n        x1, y1, x2, y2 = bbox\n        w = np.maximum(x2 - x1, 1)\n        h = np.maximum(y2 - y1, 1)\n\n        if len(self.masks) == 0:\n            cropped_masks = np.empty((0, h, w), dtype=np.uint8)\n        else:\n            cropped_masks = self.masks[:, y1:y1 + h, x1:x1 + w]\n        return BitmapMasks(cropped_masks, h, w)\n\n    def crop_and_resize(self,\n                        bboxes,\n                        out_shape,\n                        inds,\n                        device='cpu',\n                        interpolation='bilinear',\n                        binarize=True):\n        \"\"\"See :func:`BaseInstanceMasks.crop_and_resize`.\"\"\"\n        if len(self.masks) == 0:\n            empty_masks = np.empty((0, *out_shape), dtype=np.uint8)\n            return BitmapMasks(empty_masks, *out_shape)\n\n        # convert bboxes to tensor\n        if isinstance(bboxes, np.ndarray):\n            bboxes = torch.from_numpy(bboxes).to(device=device)\n        if isinstance(inds, np.ndarray):\n            inds = torch.from_numpy(inds).to(device=device)\n\n        num_bbox = bboxes.shape[0]\n        fake_inds = torch.arange(\n            num_bbox, device=device).to(dtype=bboxes.dtype)[:, None]\n        rois = torch.cat([fake_inds, bboxes], dim=1)  # Nx5\n        rois = rois.to(device=device)\n        if num_bbox > 0:\n            gt_masks_th = torch.from_numpy(self.masks).to(device).index_select(\n                0, inds).to(dtype=rois.dtype)\n            targets = roi_align(gt_masks_th[:, None, :, :], rois, out_shape,\n                                1.0, 0, 'avg', True).squeeze(1)\n            if binarize:\n                resized_masks = (targets >= 0.5).cpu().numpy()\n            else:\n                resized_masks = targets.cpu().numpy()\n        else:\n            resized_masks = []\n        return BitmapMasks(resized_masks, *out_shape)\n\n    def expand(self, expanded_h, expanded_w, top, left):\n        \"\"\"See :func:`BaseInstanceMasks.expand`.\"\"\"\n        if len(self.masks) == 0:\n            expanded_mask = np.empty((0, expanded_h, expanded_w),\n                                     dtype=np.uint8)\n        else:\n            expanded_mask = np.zeros((len(self), expanded_h, expanded_w),\n                                     dtype=np.uint8)\n            expanded_mask[:, top:top + self.height,\n                          left:left + self.width] = self.masks\n        return BitmapMasks(expanded_mask, expanded_h, expanded_w)\n\n    def translate(self,\n                  out_shape,\n                  offset,\n                  direction='horizontal',\n                  fill_val=0,\n                  interpolation='bilinear'):\n        \"\"\"Translate the BitmapMasks.\n\n        Args:\n            out_shape (tuple[int]): Shape for output mask, format (h, w).\n            offset (int | float): The offset for translate.\n            direction (str): The translate direction, either \"horizontal\"\n                or \"vertical\".\n            fill_val (int | float): Border value. Default 0 for masks.\n            interpolation (str): Same as :func:`mmcv.imtranslate`.\n\n        Returns:\n            BitmapMasks: Translated BitmapMasks.\n\n        Example:\n            >>> from mmdet.core.mask.structures import BitmapMasks\n            >>> self = BitmapMasks.random(dtype=np.uint8)\n            >>> out_shape = (32, 32)\n            >>> offset = 4\n            >>> direction = 'horizontal'\n            >>> fill_val = 0\n            >>> interpolation = 'bilinear'\n            >>> # Note, There seem to be issues when:\n            >>> # * out_shape is different than self's shape\n            >>> # * the mask dtype is not supported by cv2.AffineWarp\n            >>> new = self.translate(out_shape, offset, direction, fill_val,\n            >>>                      interpolation)\n            >>> assert len(new) == len(self)\n            >>> assert new.height, new.width == out_shape\n        \"\"\"\n        if len(self.masks) == 0:\n            translated_masks = np.empty((0, *out_shape), dtype=np.uint8)\n        else:\n            translated_masks = mmcv.imtranslate(\n                self.masks.transpose((1, 2, 0)),\n                offset,\n                direction,\n                border_value=fill_val,\n                interpolation=interpolation)\n            if translated_masks.ndim == 2:\n                translated_masks = translated_masks[:, :, None]\n            translated_masks = translated_masks.transpose(\n                (2, 0, 1)).astype(self.masks.dtype)\n        return BitmapMasks(translated_masks, *out_shape)\n\n    def shear(self,\n              out_shape,\n              magnitude,\n              direction='horizontal',\n              border_value=0,\n              interpolation='bilinear'):\n        \"\"\"Shear the BitmapMasks.\n\n        Args:\n            out_shape (tuple[int]): Shape for output mask, format (h, w).\n            magnitude (int | float): The magnitude used for shear.\n            direction (str): The shear direction, either \"horizontal\"\n                or \"vertical\".\n            border_value (int | tuple[int]): Value used in case of a\n                constant border.\n            interpolation (str): Same as in :func:`mmcv.imshear`.\n\n        Returns:\n            BitmapMasks: The sheared masks.\n        \"\"\"\n        if len(self.masks) == 0:\n            sheared_masks = np.empty((0, *out_shape), dtype=np.uint8)\n        else:\n            sheared_masks = mmcv.imshear(\n                self.masks.transpose((1, 2, 0)),\n                magnitude,\n                direction,\n                border_value=border_value,\n                interpolation=interpolation)\n            if sheared_masks.ndim == 2:\n                sheared_masks = sheared_masks[:, :, None]\n            sheared_masks = sheared_masks.transpose(\n                (2, 0, 1)).astype(self.masks.dtype)\n        return BitmapMasks(sheared_masks, *out_shape)\n\n    def rotate(self, out_shape, angle, center=None, scale=1.0, fill_val=0):\n        \"\"\"Rotate the BitmapMasks.\n\n        Args:\n            out_shape (tuple[int]): Shape for output mask, format (h, w).\n            angle (int | float): Rotation angle in degrees. Positive values\n                mean counter-clockwise rotation.\n            center (tuple[float], optional): Center point (w, h) of the\n                rotation in source image. If not specified, the center of\n                the image will be used.\n            scale (int | float): Isotropic scale factor.\n            fill_val (int | float): Border value. Default 0 for masks.\n\n        Returns:\n            BitmapMasks: Rotated BitmapMasks.\n        \"\"\"\n        if len(self.masks) == 0:\n            rotated_masks = np.empty((0, *out_shape), dtype=self.masks.dtype)\n        else:\n            rotated_masks = mmcv.imrotate(\n                self.masks.transpose((1, 2, 0)),\n                angle,\n                center=center,\n                scale=scale,\n                border_value=fill_val)\n            if rotated_masks.ndim == 2:\n                # case when only one mask, (h, w)\n                rotated_masks = rotated_masks[:, :, None]  # (h, w, 1)\n            rotated_masks = rotated_masks.transpose(\n                (2, 0, 1)).astype(self.masks.dtype)\n        return BitmapMasks(rotated_masks, *out_shape)\n\n    @property\n    def areas(self):\n        \"\"\"See :py:attr:`BaseInstanceMasks.areas`.\"\"\"\n        return self.masks.sum((1, 2))\n\n    def to_ndarray(self):\n        \"\"\"See :func:`BaseInstanceMasks.to_ndarray`.\"\"\"\n        return self.masks\n\n    def to_tensor(self, dtype, device):\n        \"\"\"See :func:`BaseInstanceMasks.to_tensor`.\"\"\"\n        return torch.tensor(self.masks, dtype=dtype, device=device)\n\n    @classmethod\n    def random(cls,\n               num_masks=3,\n               height=32,\n               width=32,\n               dtype=np.uint8,\n               rng=None):\n        \"\"\"Generate random bitmap masks for demo / testing purposes.\n\n        Example:\n            >>> from mmdet.core.mask.structures import BitmapMasks\n            >>> self = BitmapMasks.random()\n            >>> print('self = {}'.format(self))\n            self = BitmapMasks(num_masks=3, height=32, width=32)\n        \"\"\"\n        from mmdet.utils.util_random import ensure_rng\n        rng = ensure_rng(rng)\n        masks = (rng.rand(num_masks, height, width) > 0.1).astype(dtype)\n        self = cls(masks, height=height, width=width)\n        return self\n\n    def get_bboxes(self):\n        num_masks = len(self)\n        boxes = np.zeros((num_masks, 4), dtype=np.float32)\n        x_any = self.masks.any(axis=1)\n        y_any = self.masks.any(axis=2)\n        for idx in range(num_masks):\n            x = np.where(x_any[idx, :])[0]\n            y = np.where(y_any[idx, :])[0]\n            if len(x) > 0 and len(y) > 0:\n                # use +1 for x_max and y_max so that the right and bottom\n                # boundary of instance masks are fully included by the box\n                boxes[idx, :] = np.array([x[0], y[0], x[-1] + 1, y[-1] + 1],\n                                         dtype=np.float32)\n        return boxes\n\n\nclass PolygonMasks(BaseInstanceMasks):\n    \"\"\"This class represents masks in the form of polygons.\n\n    Polygons is a list of three levels. The first level of the list\n    corresponds to objects, the second level to the polys that compose the\n    object, the third level to the poly coordinates\n\n    Args:\n        masks (list[list[ndarray]]): The first level of the list\n            corresponds to objects, the second level to the polys that\n            compose the object, the third level to the poly coordinates\n        height (int): height of masks\n        width (int): width of masks\n\n    Example:\n        >>> from mmdet.core.mask.structures import *  # NOQA\n        >>> masks = [\n        >>>     [ np.array([0, 0, 10, 0, 10, 10., 0, 10, 0, 0]) ]\n        >>> ]\n        >>> height, width = 16, 16\n        >>> self = PolygonMasks(masks, height, width)\n\n        >>> # demo translate\n        >>> new = self.translate((16, 16), 4., direction='horizontal')\n        >>> assert np.all(new.masks[0][0][1::2] == masks[0][0][1::2])\n        >>> assert np.all(new.masks[0][0][0::2] == masks[0][0][0::2] + 4)\n\n        >>> # demo crop_and_resize\n        >>> num_boxes = 3\n        >>> bboxes = np.array([[0, 0, 30, 10.0]] * num_boxes)\n        >>> out_shape = (16, 16)\n        >>> inds = torch.randint(0, len(self), size=(num_boxes,))\n        >>> device = 'cpu'\n        >>> interpolation = 'bilinear'\n        >>> new = self.crop_and_resize(\n        ...     bboxes, out_shape, inds, device, interpolation)\n        >>> assert len(new) == num_boxes\n        >>> assert new.height, new.width == out_shape\n    \"\"\"\n\n    def __init__(self, masks, height, width):\n        assert isinstance(masks, list)\n        if len(masks) > 0:\n            assert isinstance(masks[0], list)\n            assert isinstance(masks[0][0], np.ndarray)\n\n        self.height = height\n        self.width = width\n        self.masks = masks\n\n    def __getitem__(self, index):\n        \"\"\"Index the polygon masks.\n\n        Args:\n            index (ndarray | List): The indices.\n\n        Returns:\n            :obj:`PolygonMasks`: The indexed polygon masks.\n        \"\"\"\n        if isinstance(index, np.ndarray):\n            index = index.tolist()\n        if isinstance(index, list):\n            masks = [self.masks[i] for i in index]\n        else:\n            try:\n                masks = self.masks[index]\n            except Exception:\n                raise ValueError(\n                    f'Unsupported input of type {type(index)} for indexing!')\n        if len(masks) and isinstance(masks[0], np.ndarray):\n            masks = [masks]  # ensure a list of three levels\n        return PolygonMasks(masks, self.height, self.width)\n\n    def __iter__(self):\n        return iter(self.masks)\n\n    def __repr__(self):\n        s = self.__class__.__name__ + '('\n        s += f'num_masks={len(self.masks)}, '\n        s += f'height={self.height}, '\n        s += f'width={self.width})'\n        return s\n\n    def __len__(self):\n        \"\"\"Number of masks.\"\"\"\n        return len(self.masks)\n\n    def rescale(self, scale, interpolation=None):\n        \"\"\"see :func:`BaseInstanceMasks.rescale`\"\"\"\n        new_w, new_h = mmcv.rescale_size((self.width, self.height), scale)\n        if len(self.masks) == 0:\n            rescaled_masks = PolygonMasks([], new_h, new_w)\n        else:\n            rescaled_masks = self.resize((new_h, new_w))\n        return rescaled_masks\n\n    def resize(self, out_shape, interpolation=None):\n        \"\"\"see :func:`BaseInstanceMasks.resize`\"\"\"\n        if len(self.masks) == 0:\n            resized_masks = PolygonMasks([], *out_shape)\n        else:\n            h_scale = out_shape[0] / self.height\n            w_scale = out_shape[1] / self.width\n            resized_masks = []\n            for poly_per_obj in self.masks:\n                resized_poly = []\n                for p in poly_per_obj:\n                    p = p.copy()\n                    p[0::2] = p[0::2] * w_scale\n                    p[1::2] = p[1::2] * h_scale\n                    resized_poly.append(p)\n                resized_masks.append(resized_poly)\n            resized_masks = PolygonMasks(resized_masks, *out_shape)\n        return resized_masks\n\n    def flip(self, flip_direction='horizontal'):\n        \"\"\"see :func:`BaseInstanceMasks.flip`\"\"\"\n        assert flip_direction in ('horizontal', 'vertical', 'diagonal')\n        if len(self.masks) == 0:\n            flipped_masks = PolygonMasks([], self.height, self.width)\n        else:\n            flipped_masks = []\n            for poly_per_obj in self.masks:\n                flipped_poly_per_obj = []\n                for p in poly_per_obj:\n                    p = p.copy()\n                    if flip_direction == 'horizontal':\n                        p[0::2] = self.width - p[0::2]\n                    elif flip_direction == 'vertical':\n                        p[1::2] = self.height - p[1::2]\n                    else:\n                        p[0::2] = self.width - p[0::2]\n                        p[1::2] = self.height - p[1::2]\n                    flipped_poly_per_obj.append(p)\n                flipped_masks.append(flipped_poly_per_obj)\n            flipped_masks = PolygonMasks(flipped_masks, self.height,\n                                         self.width)\n        return flipped_masks\n\n    def crop(self, bbox):\n        \"\"\"see :func:`BaseInstanceMasks.crop`\"\"\"\n        assert isinstance(bbox, np.ndarray)\n        assert bbox.ndim == 1\n\n        # clip the boundary\n        bbox = bbox.copy()\n        bbox[0::2] = np.clip(bbox[0::2], 0, self.width)\n        bbox[1::2] = np.clip(bbox[1::2], 0, self.height)\n        x1, y1, x2, y2 = bbox\n        w = np.maximum(x2 - x1, 1)\n        h = np.maximum(y2 - y1, 1)\n\n        if len(self.masks) == 0:\n            cropped_masks = PolygonMasks([], h, w)\n        else:\n            cropped_masks = []\n            for poly_per_obj in self.masks:\n                cropped_poly_per_obj = []\n                for p in poly_per_obj:\n                    # pycocotools will clip the boundary\n                    p = p.copy()\n                    p[0::2] = p[0::2] - bbox[0]\n                    p[1::2] = p[1::2] - bbox[1]\n                    cropped_poly_per_obj.append(p)\n                cropped_masks.append(cropped_poly_per_obj)\n            cropped_masks = PolygonMasks(cropped_masks, h, w)\n        return cropped_masks\n\n    def pad(self, out_shape, pad_val=0):\n        \"\"\"padding has no effect on polygons`\"\"\"\n        return PolygonMasks(self.masks, *out_shape)\n\n    def expand(self, *args, **kwargs):\n        \"\"\"TODO: Add expand for polygon\"\"\"\n        raise NotImplementedError\n\n    def crop_and_resize(self,\n                        bboxes,\n                        out_shape,\n                        inds,\n                        device='cpu',\n                        interpolation='bilinear',\n                        binarize=True):\n        \"\"\"see :func:`BaseInstanceMasks.crop_and_resize`\"\"\"\n        out_h, out_w = out_shape\n        if len(self.masks) == 0:\n            return PolygonMasks([], out_h, out_w)\n\n        if not binarize:\n            raise ValueError('Polygons are always binary, '\n                             'setting binarize=False is unsupported')\n\n        resized_masks = []\n        for i in range(len(bboxes)):\n            mask = self.masks[inds[i]]\n            bbox = bboxes[i, :]\n            x1, y1, x2, y2 = bbox\n            w = np.maximum(x2 - x1, 1)\n            h = np.maximum(y2 - y1, 1)\n            h_scale = out_h / max(h, 0.1)  # avoid too large scale\n            w_scale = out_w / max(w, 0.1)\n\n            resized_mask = []\n            for p in mask:\n                p = p.copy()\n                # crop\n                # pycocotools will clip the boundary\n                p[0::2] = p[0::2] - bbox[0]\n                p[1::2] = p[1::2] - bbox[1]\n\n                # resize\n                p[0::2] = p[0::2] * w_scale\n                p[1::2] = p[1::2] * h_scale\n                resized_mask.append(p)\n            resized_masks.append(resized_mask)\n        return PolygonMasks(resized_masks, *out_shape)\n\n    def translate(self,\n                  out_shape,\n                  offset,\n                  direction='horizontal',\n                  fill_val=None,\n                  interpolation=None):\n        \"\"\"Translate the PolygonMasks.\n\n        Example:\n            >>> self = PolygonMasks.random(dtype=np.int)\n            >>> out_shape = (self.height, self.width)\n            >>> new = self.translate(out_shape, 4., direction='horizontal')\n            >>> assert np.all(new.masks[0][0][1::2] == self.masks[0][0][1::2])\n            >>> assert np.all(new.masks[0][0][0::2] == self.masks[0][0][0::2] + 4)  # noqa: E501\n        \"\"\"\n        assert fill_val is None or fill_val == 0, 'Here fill_val is not '\\\n            f'used, and defaultly should be None or 0. got {fill_val}.'\n        if len(self.masks) == 0:\n            translated_masks = PolygonMasks([], *out_shape)\n        else:\n            translated_masks = []\n            for poly_per_obj in self.masks:\n                translated_poly_per_obj = []\n                for p in poly_per_obj:\n                    p = p.copy()\n                    if direction == 'horizontal':\n                        p[0::2] = np.clip(p[0::2] + offset, 0, out_shape[1])\n                    elif direction == 'vertical':\n                        p[1::2] = np.clip(p[1::2] + offset, 0, out_shape[0])\n                    translated_poly_per_obj.append(p)\n                translated_masks.append(translated_poly_per_obj)\n            translated_masks = PolygonMasks(translated_masks, *out_shape)\n        return translated_masks\n\n    def shear(self,\n              out_shape,\n              magnitude,\n              direction='horizontal',\n              border_value=0,\n              interpolation='bilinear'):\n        \"\"\"See :func:`BaseInstanceMasks.shear`.\"\"\"\n        if len(self.masks) == 0:\n            sheared_masks = PolygonMasks([], *out_shape)\n        else:\n            sheared_masks = []\n            if direction == 'horizontal':\n                shear_matrix = np.stack([[1, magnitude],\n                                         [0, 1]]).astype(np.float32)\n            elif direction == 'vertical':\n                shear_matrix = np.stack([[1, 0], [magnitude,\n                                                  1]]).astype(np.float32)\n            for poly_per_obj in self.masks:\n                sheared_poly = []\n                for p in poly_per_obj:\n                    p = np.stack([p[0::2], p[1::2]], axis=0)  # [2, n]\n                    new_coords = np.matmul(shear_matrix, p)  # [2, n]\n                    new_coords[0, :] = np.clip(new_coords[0, :], 0,\n                                               out_shape[1])\n                    new_coords[1, :] = np.clip(new_coords[1, :], 0,\n                                               out_shape[0])\n                    sheared_poly.append(\n                        new_coords.transpose((1, 0)).reshape(-1))\n                sheared_masks.append(sheared_poly)\n            sheared_masks = PolygonMasks(sheared_masks, *out_shape)\n        return sheared_masks\n\n    def rotate(self, out_shape, angle, center=None, scale=1.0, fill_val=0):\n        \"\"\"See :func:`BaseInstanceMasks.rotate`.\"\"\"\n        if len(self.masks) == 0:\n            rotated_masks = PolygonMasks([], *out_shape)\n        else:\n            rotated_masks = []\n            rotate_matrix = cv2.getRotationMatrix2D(center, -angle, scale)\n            for poly_per_obj in self.masks:\n                rotated_poly = []\n                for p in poly_per_obj:\n                    p = p.copy()\n                    coords = np.stack([p[0::2], p[1::2]], axis=1)  # [n, 2]\n                    # pad 1 to convert from format [x, y] to homogeneous\n                    # coordinates format [x, y, 1]\n                    coords = np.concatenate(\n                        (coords, np.ones((coords.shape[0], 1), coords.dtype)),\n                        axis=1)  # [n, 3]\n                    rotated_coords = np.matmul(\n                        rotate_matrix[None, :, :],\n                        coords[:, :, None])[..., 0]  # [n, 2, 1] -> [n, 2]\n                    rotated_coords[:, 0] = np.clip(rotated_coords[:, 0], 0,\n                                                   out_shape[1])\n                    rotated_coords[:, 1] = np.clip(rotated_coords[:, 1], 0,\n                                                   out_shape[0])\n                    rotated_poly.append(rotated_coords.reshape(-1))\n                rotated_masks.append(rotated_poly)\n            rotated_masks = PolygonMasks(rotated_masks, *out_shape)\n        return rotated_masks\n\n    def to_bitmap(self):\n        \"\"\"convert polygon masks to bitmap masks.\"\"\"\n        bitmap_masks = self.to_ndarray()\n        return BitmapMasks(bitmap_masks, self.height, self.width)\n\n    @property\n    def areas(self):\n        \"\"\"Compute areas of masks.\n\n        This func is modified from `detectron2\n        <https://github.com/facebookresearch/detectron2/blob/ffff8acc35ea88ad1cb1806ab0f00b4c1c5dbfd9/detectron2/structures/masks.py#L387>`_.\n        The function only works with Polygons using the shoelace formula.\n\n        Return:\n            ndarray: areas of each instance\n        \"\"\"  # noqa: W501\n        area = []\n        for polygons_per_obj in self.masks:\n            area_per_obj = 0\n            for p in polygons_per_obj:\n                area_per_obj += self._polygon_area(p[0::2], p[1::2])\n            area.append(area_per_obj)\n        return np.asarray(area)\n\n    def _polygon_area(self, x, y):\n        \"\"\"Compute the area of a component of a polygon.\n\n        Using the shoelace formula:\n        https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates\n\n        Args:\n            x (ndarray): x coordinates of the component\n            y (ndarray): y coordinates of the component\n\n        Return:\n            float: the are of the component\n        \"\"\"  # noqa: 501\n        return 0.5 * np.abs(\n            np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))\n\n    def to_ndarray(self):\n        \"\"\"Convert masks to the format of ndarray.\"\"\"\n        if len(self.masks) == 0:\n            return np.empty((0, self.height, self.width), dtype=np.uint8)\n        bitmap_masks = []\n        for poly_per_obj in self.masks:\n            bitmap_masks.append(\n                polygon_to_bitmap(poly_per_obj, self.height, self.width))\n        return np.stack(bitmap_masks)\n\n    def to_tensor(self, dtype, device):\n        \"\"\"See :func:`BaseInstanceMasks.to_tensor`.\"\"\"\n        if len(self.masks) == 0:\n            return torch.empty((0, self.height, self.width),\n                               dtype=dtype,\n                               device=device)\n        ndarray_masks = self.to_ndarray()\n        return torch.tensor(ndarray_masks, dtype=dtype, device=device)\n\n    @classmethod\n    def random(cls,\n               num_masks=3,\n               height=32,\n               width=32,\n               n_verts=5,\n               dtype=np.float32,\n               rng=None):\n        \"\"\"Generate random polygon masks for demo / testing purposes.\n\n        Adapted from [1]_\n\n        References:\n            .. [1] https://gitlab.kitware.com/computer-vision/kwimage/-/blob/928cae35ca8/kwimage/structs/polygon.py#L379  # noqa: E501\n\n        Example:\n            >>> from mmdet.core.mask.structures import PolygonMasks\n            >>> self = PolygonMasks.random()\n            >>> print('self = {}'.format(self))\n        \"\"\"\n        from mmdet.utils.util_random import ensure_rng\n        rng = ensure_rng(rng)\n\n        def _gen_polygon(n, irregularity, spikeyness):\n            \"\"\"Creates the polygon by sampling points on a circle around the\n            centre.  Random noise is added by varying the angular spacing\n            between sequential points, and by varying the radial distance of\n            each point from the centre.\n\n            Based on original code by Mike Ounsworth\n\n            Args:\n                n (int): number of vertices\n                irregularity (float): [0,1] indicating how much variance there\n                    is in the angular spacing of vertices. [0,1] will map to\n                    [0, 2pi/numberOfVerts]\n                spikeyness (float): [0,1] indicating how much variance there is\n                    in each vertex from the circle of radius aveRadius. [0,1]\n                    will map to [0, aveRadius]\n\n            Returns:\n                a list of vertices, in CCW order.\n            \"\"\"\n            from scipy.stats import truncnorm\n\n            # Generate around the unit circle\n            cx, cy = (0.0, 0.0)\n            radius = 1\n\n            tau = np.pi * 2\n\n            irregularity = np.clip(irregularity, 0, 1) * 2 * np.pi / n\n            spikeyness = np.clip(spikeyness, 1e-9, 1)\n\n            # generate n angle steps\n            lower = (tau / n) - irregularity\n            upper = (tau / n) + irregularity\n            angle_steps = rng.uniform(lower, upper, n)\n\n            # normalize the steps so that point 0 and point n+1 are the same\n            k = angle_steps.sum() / (2 * np.pi)\n            angles = (angle_steps / k).cumsum() + rng.uniform(0, tau)\n\n            # Convert high and low values to be wrt the standard normal range\n            # https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.truncnorm.html\n            low = 0\n            high = 2 * radius\n            mean = radius\n            std = spikeyness\n            a = (low - mean) / std\n            b = (high - mean) / std\n            tnorm = truncnorm(a=a, b=b, loc=mean, scale=std)\n\n            # now generate the points\n            radii = tnorm.rvs(n, random_state=rng)\n            x_pts = cx + radii * np.cos(angles)\n            y_pts = cy + radii * np.sin(angles)\n\n            points = np.hstack([x_pts[:, None], y_pts[:, None]])\n\n            # Scale to 0-1 space\n            points = points - points.min(axis=0)\n            points = points / points.max(axis=0)\n\n            # Randomly place within 0-1 space\n            points = points * (rng.rand() * .8 + .2)\n            min_pt = points.min(axis=0)\n            max_pt = points.max(axis=0)\n\n            high = (1 - max_pt)\n            low = (0 - min_pt)\n            offset = (rng.rand(2) * (high - low)) + low\n            points = points + offset\n            return points\n\n        def _order_vertices(verts):\n            \"\"\"\n            References:\n                https://stackoverflow.com/questions/1709283/how-can-i-sort-a-coordinate-list-for-a-rectangle-counterclockwise\n            \"\"\"\n            mlat = verts.T[0].sum() / len(verts)\n            mlng = verts.T[1].sum() / len(verts)\n\n            tau = np.pi * 2\n            angle = (np.arctan2(mlat - verts.T[0], verts.T[1] - mlng) +\n                     tau) % tau\n            sortx = angle.argsort()\n            verts = verts.take(sortx, axis=0)\n            return verts\n\n        # Generate a random exterior for each requested mask\n        masks = []\n        for _ in range(num_masks):\n            exterior = _order_vertices(_gen_polygon(n_verts, 0.9, 0.9))\n            exterior = (exterior * [(width, height)]).astype(dtype)\n            masks.append([exterior.ravel()])\n\n        self = cls(masks, height, width)\n        return self\n\n    def get_bboxes(self):\n        num_masks = len(self)\n        boxes = np.zeros((num_masks, 4), dtype=np.float32)\n        for idx, poly_per_obj in enumerate(self.masks):\n            # simply use a number that is big enough for comparison with\n            # coordinates\n            xy_min = np.array([self.width * 2, self.height * 2],\n                              dtype=np.float32)\n            xy_max = np.zeros(2, dtype=np.float32)\n            for p in poly_per_obj:\n                xy = np.array(p).reshape(-1, 2).astype(np.float32)\n                xy_min = np.minimum(xy_min, np.min(xy, axis=0))\n                xy_max = np.maximum(xy_max, np.max(xy, axis=0))\n            boxes[idx, :2] = xy_min\n            boxes[idx, 2:] = xy_max\n\n        return boxes\n\n\ndef polygon_to_bitmap(polygons, height, width):\n    \"\"\"Convert masks from the form of polygons to bitmaps.\n\n    Args:\n        polygons (list[ndarray]): masks in polygon representation\n        height (int): mask height\n        width (int): mask width\n\n    Return:\n        ndarray: the converted masks in bitmap representation\n    \"\"\"\n    rles = maskUtils.frPyObjects(polygons, height, width)\n    rle = maskUtils.merge(rles)\n    bitmap_mask = maskUtils.decode(rle).astype(np.bool)\n    return bitmap_mask\n\n\ndef bitmap_to_polygon(bitmap):\n    \"\"\"Convert masks from the form of bitmaps to polygons.\n\n    Args:\n        bitmap (ndarray): masks in bitmap representation.\n\n    Return:\n        list[ndarray]: the converted mask in polygon representation.\n        bool: whether the mask has holes.\n    \"\"\"\n    bitmap = np.ascontiguousarray(bitmap).astype(np.uint8)\n    # cv2.RETR_CCOMP: retrieves all of the contours and organizes them\n    #   into a two-level hierarchy. At the top level, there are external\n    #   boundaries of the components. At the second level, there are\n    #   boundaries of the holes. If there is another contour inside a hole\n    #   of a connected component, it is still put at the top level.\n    # cv2.CHAIN_APPROX_NONE: stores absolutely all the contour points.\n    outs = cv2.findContours(bitmap, cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)\n    contours = outs[-2]\n    hierarchy = outs[-1]\n    if hierarchy is None:\n        return [], False\n    # hierarchy[i]: 4 elements, for the indexes of next, previous,\n    # parent, or nested contours. If there is no corresponding contour,\n    # it will be -1.\n    with_hole = (hierarchy.reshape(-1, 4)[:, 3] >= 0).any()\n    contours = [c.reshape(-1, 2) for c in contours]\n    return contours, with_hole\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/mask/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nimport torch\n\n\ndef split_combined_polys(polys, poly_lens, polys_per_mask):\n    \"\"\"Split the combined 1-D polys into masks.\n\n    A mask is represented as a list of polys, and a poly is represented as\n    a 1-D array. In dataset, all masks are concatenated into a single 1-D\n    tensor. Here we need to split the tensor into original representations.\n\n    Args:\n        polys (list): a list (length = image num) of 1-D tensors\n        poly_lens (list): a list (length = image num) of poly length\n        polys_per_mask (list): a list (length = image num) of poly number\n            of each mask\n\n    Returns:\n        list: a list (length = image num) of list (length = mask num) of \\\n            list (length = poly num) of numpy array.\n    \"\"\"\n    mask_polys_list = []\n    for img_id in range(len(polys)):\n        polys_single = polys[img_id]\n        polys_lens_single = poly_lens[img_id].tolist()\n        polys_per_mask_single = polys_per_mask[img_id].tolist()\n\n        split_polys = mmcv.slice_list(polys_single, polys_lens_single)\n        mask_polys = mmcv.slice_list(split_polys, polys_per_mask_single)\n        mask_polys_list.append(mask_polys)\n    return mask_polys_list\n\n\n# TODO: move this function to more proper place\ndef encode_mask_results(mask_results):\n    \"\"\"Encode bitmap mask to RLE code.\n\n    Args:\n        mask_results (list | tuple[list]): bitmap mask results.\n            In mask scoring rcnn, mask_results is a tuple of (segm_results,\n            segm_cls_score).\n\n    Returns:\n        list | tuple: RLE encoded mask.\n    \"\"\"\n    if isinstance(mask_results, tuple):  # mask scoring\n        cls_segms, cls_mask_scores = mask_results\n    else:\n        cls_segms = mask_results\n    num_classes = len(cls_segms)\n    encoded_mask_results = [[] for _ in range(num_classes)]\n    for i in range(len(cls_segms)):\n        for cls_segm in cls_segms[i]:\n            encoded_mask_results[i].append(\n                mask_util.encode(\n                    np.array(\n                        cls_segm[:, :, np.newaxis], order='F',\n                        dtype='uint8'))[0])  # encoded with RLE\n    if isinstance(mask_results, tuple):\n        return encoded_mask_results, cls_mask_scores\n    else:\n        return encoded_mask_results\n\n\ndef mask2bbox(masks):\n    \"\"\"Obtain tight bounding boxes of binary masks.\n\n    Args:\n        masks (Tensor): Binary mask of shape (n, h, w).\n\n    Returns:\n        Tensor: Bboxe with shape (n, 4) of \\\n            positive region in binary mask.\n    \"\"\"\n    N = masks.shape[0]\n    bboxes = masks.new_zeros((N, 4), dtype=torch.float32)\n    x_any = torch.any(masks, dim=1)\n    y_any = torch.any(masks, dim=2)\n    for i in range(N):\n        x = torch.where(x_any[i, :])[0]\n        y = torch.where(y_any[i, :])[0]\n        if len(x) > 0 and len(y) > 0:\n            bboxes[i, :] = bboxes.new_tensor(\n                [x[0], y[0], x[-1] + 1, y[-1] + 1])\n\n    return bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/optimizers/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .builder import OPTIMIZER_BUILDERS, build_optimizer\nfrom .layer_decay_optimizer_constructor import \\\n    LearningRateDecayOptimizerConstructor\n\n__all__ = [\n    'LearningRateDecayOptimizerConstructor', 'OPTIMIZER_BUILDERS',\n    'build_optimizer'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/optimizers/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nfrom mmcv.runner.optimizer import OPTIMIZER_BUILDERS as MMCV_OPTIMIZER_BUILDERS\nfrom mmcv.utils import Registry, build_from_cfg\n\nOPTIMIZER_BUILDERS = Registry(\n    'optimizer builder', parent=MMCV_OPTIMIZER_BUILDERS)\n\n\ndef build_optimizer_constructor(cfg):\n    constructor_type = cfg.get('type')\n    if constructor_type in OPTIMIZER_BUILDERS:\n        return build_from_cfg(cfg, OPTIMIZER_BUILDERS)\n    elif constructor_type in MMCV_OPTIMIZER_BUILDERS:\n        return build_from_cfg(cfg, MMCV_OPTIMIZER_BUILDERS)\n    else:\n        raise KeyError(f'{constructor_type} is not registered '\n                       'in the optimizer builder registry.')\n\n\ndef build_optimizer(model, cfg):\n    optimizer_cfg = copy.deepcopy(cfg)\n    constructor_type = optimizer_cfg.pop('constructor',\n                                         'DefaultOptimizerConstructor')\n    paramwise_cfg = optimizer_cfg.pop('paramwise_cfg', None)\n    optim_constructor = build_optimizer_constructor(\n        dict(\n            type=constructor_type,\n            optimizer_cfg=optimizer_cfg,\n            paramwise_cfg=paramwise_cfg))\n    optimizer = optim_constructor(model)\n    return optimizer\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/optimizers/layer_decay_optimizer_constructor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport json\n\nfrom mmcv.runner import DefaultOptimizerConstructor, get_dist_info\n\nfrom mmdet.utils import get_root_logger\nfrom .builder import OPTIMIZER_BUILDERS\n\n\ndef get_layer_id_for_convnext(var_name, max_layer_id):\n    \"\"\"Get the layer id to set the different learning rates in ``layer_wise``\n    decay_type.\n\n    Args:\n        var_name (str): The key of the model.\n        max_layer_id (int): Maximum layer id.\n\n    Returns:\n        int: The id number corresponding to different learning rate in\n        ``LearningRateDecayOptimizerConstructor``.\n    \"\"\"\n\n    if var_name in ('backbone.cls_token', 'backbone.mask_token',\n                    'backbone.pos_embed'):\n        return 0\n    elif var_name.startswith('backbone.downsample_layers'):\n        stage_id = int(var_name.split('.')[2])\n        if stage_id == 0:\n            layer_id = 0\n        elif stage_id == 1:\n            layer_id = 2\n        elif stage_id == 2:\n            layer_id = 3\n        elif stage_id == 3:\n            layer_id = max_layer_id\n        return layer_id\n    elif var_name.startswith('backbone.stages'):\n        stage_id = int(var_name.split('.')[2])\n        block_id = int(var_name.split('.')[3])\n        if stage_id == 0:\n            layer_id = 1\n        elif stage_id == 1:\n            layer_id = 2\n        elif stage_id == 2:\n            layer_id = 3 + block_id // 3\n        elif stage_id == 3:\n            layer_id = max_layer_id\n        return layer_id\n    else:\n        return max_layer_id + 1\n\n\ndef get_stage_id_for_convnext(var_name, max_stage_id):\n    \"\"\"Get the stage id to set the different learning rates in ``stage_wise``\n    decay_type.\n\n    Args:\n        var_name (str): The key of the model.\n        max_stage_id (int): Maximum stage id.\n\n    Returns:\n        int: The id number corresponding to different learning rate in\n        ``LearningRateDecayOptimizerConstructor``.\n    \"\"\"\n\n    if var_name in ('backbone.cls_token', 'backbone.mask_token',\n                    'backbone.pos_embed'):\n        return 0\n    elif var_name.startswith('backbone.downsample_layers'):\n        return 0\n    elif var_name.startswith('backbone.stages'):\n        stage_id = int(var_name.split('.')[2])\n        return stage_id + 1\n    else:\n        return max_stage_id - 1\n\n\n@OPTIMIZER_BUILDERS.register_module()\nclass LearningRateDecayOptimizerConstructor(DefaultOptimizerConstructor):\n    # Different learning rates are set for different layers of backbone.\n    # Note: Currently, this optimizer constructor is built for ConvNeXt.\n\n    def add_params(self, params, module, **kwargs):\n        \"\"\"Add all parameters of module to the params list.\n\n        The parameters of the given module will be added to the list of param\n        groups, with specific rules defined by paramwise_cfg.\n\n        Args:\n            params (list[dict]): A list of param groups, it will be modified\n                in place.\n            module (nn.Module): The module to be added.\n        \"\"\"\n        logger = get_root_logger()\n\n        parameter_groups = {}\n        logger.info(f'self.paramwise_cfg is {self.paramwise_cfg}')\n        num_layers = self.paramwise_cfg.get('num_layers') + 2\n        decay_rate = self.paramwise_cfg.get('decay_rate')\n        decay_type = self.paramwise_cfg.get('decay_type', 'layer_wise')\n        logger.info('Build LearningRateDecayOptimizerConstructor  '\n                    f'{decay_type} {decay_rate} - {num_layers}')\n        weight_decay = self.base_wd\n        for name, param in module.named_parameters():\n            if not param.requires_grad:\n                continue  # frozen weights\n            if len(param.shape) == 1 or name.endswith('.bias') or name in (\n                    'pos_embed', 'cls_token'):\n                group_name = 'no_decay'\n                this_weight_decay = 0.\n            else:\n                group_name = 'decay'\n                this_weight_decay = weight_decay\n            if 'layer_wise' in decay_type:\n                if 'ConvNeXt' in module.backbone.__class__.__name__:\n                    layer_id = get_layer_id_for_convnext(\n                        name, self.paramwise_cfg.get('num_layers'))\n                    logger.info(f'set param {name} as id {layer_id}')\n                else:\n                    raise NotImplementedError()\n            elif decay_type == 'stage_wise':\n                if 'ConvNeXt' in module.backbone.__class__.__name__:\n                    layer_id = get_stage_id_for_convnext(name, num_layers)\n                    logger.info(f'set param {name} as id {layer_id}')\n                else:\n                    raise NotImplementedError()\n            group_name = f'layer_{layer_id}_{group_name}'\n\n            if group_name not in parameter_groups:\n                scale = decay_rate**(num_layers - layer_id - 1)\n\n                parameter_groups[group_name] = {\n                    'weight_decay': this_weight_decay,\n                    'params': [],\n                    'param_names': [],\n                    'lr_scale': scale,\n                    'group_name': group_name,\n                    'lr': scale * self.base_lr,\n                }\n\n            parameter_groups[group_name]['params'].append(param)\n            parameter_groups[group_name]['param_names'].append(name)\n        rank, _ = get_dist_info()\n        if rank == 0:\n            to_display = {}\n            for key in parameter_groups:\n                to_display[key] = {\n                    'param_names': parameter_groups[key]['param_names'],\n                    'lr_scale': parameter_groups[key]['lr_scale'],\n                    'lr': parameter_groups[key]['lr'],\n                    'weight_decay': parameter_groups[key]['weight_decay'],\n                }\n            logger.info(f'Param groups = {json.dumps(to_display, indent=2)}')\n        params.extend(parameter_groups.values())\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/post_processing/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .bbox_nms import fast_nms, multiclass_nms\nfrom .matrix_nms import mask_matrix_nms\nfrom .merge_augs import (merge_aug_bboxes, merge_aug_masks,\n                         merge_aug_proposals, merge_aug_scores)\n\n__all__ = [\n    'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',\n    'merge_aug_scores', 'merge_aug_masks', 'mask_matrix_nms', 'fast_nms'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/post_processing/bbox_nms.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops.nms import batched_nms\n\nfrom mmdet.core.bbox.iou_calculators import bbox_overlaps\n\n\ndef multiclass_nms(multi_bboxes,\n                   multi_scores,\n                   score_thr,\n                   nms_cfg,\n                   max_num=-1,\n                   score_factors=None,\n                   return_inds=False):\n    \"\"\"NMS for multi-class bboxes.\n\n    Args:\n        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)\n        multi_scores (Tensor): shape (n, #class), where the last column\n            contains scores of the background class, but this will be ignored.\n        score_thr (float): bbox threshold, bboxes with scores lower than it\n            will not be considered.\n        nms_cfg (dict): a dict that contains the arguments of nms operations\n        max_num (int, optional): if there are more than max_num bboxes after\n            NMS, only top max_num will be kept. Default to -1.\n        score_factors (Tensor, optional): The factors multiplied to scores\n            before applying NMS. Default to None.\n        return_inds (bool, optional): Whether return the indices of kept\n            bboxes. Default to False.\n\n    Returns:\n        tuple: (dets, labels, indices (optional)), tensors of shape (k, 5),\n            (k), and (k). Dets are boxes with scores. Labels are 0-based.\n    \"\"\"\n    num_classes = multi_scores.size(1) - 1\n    # exclude background category\n    if multi_bboxes.shape[1] > 4:\n        bboxes = multi_bboxes.view(multi_scores.size(0), -1, 4)\n    else:\n        bboxes = multi_bboxes[:, None].expand(\n            multi_scores.size(0), num_classes, 4)\n\n    scores = multi_scores[:, :-1]\n\n    labels = torch.arange(num_classes, dtype=torch.long, device=scores.device)\n    labels = labels.view(1, -1).expand_as(scores)\n\n    bboxes = bboxes.reshape(-1, 4)\n    scores = scores.reshape(-1)\n    labels = labels.reshape(-1)\n\n    if not torch.onnx.is_in_onnx_export():\n        # NonZero not supported  in TensorRT\n        # remove low scoring boxes\n        valid_mask = scores > score_thr\n    # multiply score_factor after threshold to preserve more bboxes, improve\n    # mAP by 1% for YOLOv3\n    if score_factors is not None:\n        # expand the shape to match original shape of score\n        score_factors = score_factors.view(-1, 1).expand(\n            multi_scores.size(0), num_classes)\n        score_factors = score_factors.reshape(-1)\n        scores = scores * score_factors\n\n    if not torch.onnx.is_in_onnx_export():\n        # NonZero not supported  in TensorRT\n        inds = valid_mask.nonzero(as_tuple=False).squeeze(1)\n        bboxes, scores, labels = bboxes[inds], scores[inds], labels[inds]\n    else:\n        # TensorRT NMS plugin has invalid output filled with -1\n        # add dummy data to make detection output correct.\n        bboxes = torch.cat([bboxes, bboxes.new_zeros(1, 4)], dim=0)\n        scores = torch.cat([scores, scores.new_zeros(1)], dim=0)\n        labels = torch.cat([labels, labels.new_zeros(1)], dim=0)\n\n    if bboxes.numel() == 0:\n        if torch.onnx.is_in_onnx_export():\n            raise RuntimeError('[ONNX Error] Can not record NMS '\n                               'as it has not been executed this time')\n        dets = torch.cat([bboxes, scores[:, None]], -1)\n        if return_inds:\n            return dets, labels, inds\n        else:\n            return dets, labels\n\n    dets, keep = batched_nms(bboxes, scores, labels, nms_cfg)\n\n    if max_num > 0:\n        dets = dets[:max_num]\n        keep = keep[:max_num]\n\n    if return_inds:\n        return dets, labels[keep], inds[keep]\n    else:\n        return dets, labels[keep]\n\n\ndef fast_nms(multi_bboxes,\n             multi_scores,\n             multi_coeffs,\n             score_thr,\n             iou_thr,\n             top_k,\n             max_num=-1):\n    \"\"\"Fast NMS in `YOLACT <https://arxiv.org/abs/1904.02689>`_.\n\n    Fast NMS allows already-removed detections to suppress other detections so\n    that every instance can be decided to be kept or discarded in parallel,\n    which is not possible in traditional NMS. This relaxation allows us to\n    implement Fast NMS entirely in standard GPU-accelerated matrix operations.\n\n    Args:\n        multi_bboxes (Tensor): shape (n, #class*4) or (n, 4)\n        multi_scores (Tensor): shape (n, #class+1), where the last column\n            contains scores of the background class, but this will be ignored.\n        multi_coeffs (Tensor): shape (n, #class*coeffs_dim).\n        score_thr (float): bbox threshold, bboxes with scores lower than it\n            will not be considered.\n        iou_thr (float): IoU threshold to be considered as conflicted.\n        top_k (int): if there are more than top_k bboxes before NMS,\n            only top top_k will be kept.\n        max_num (int): if there are more than max_num bboxes after NMS,\n            only top max_num will be kept. If -1, keep all the bboxes.\n            Default: -1.\n\n    Returns:\n        tuple: (dets, labels, coefficients), tensors of shape (k, 5), (k, 1),\n            and (k, coeffs_dim). Dets are boxes with scores.\n            Labels are 0-based.\n    \"\"\"\n\n    scores = multi_scores[:, :-1].t()  # [#class, n]\n    scores, idx = scores.sort(1, descending=True)\n\n    idx = idx[:, :top_k].contiguous()\n    scores = scores[:, :top_k]  # [#class, topk]\n    num_classes, num_dets = idx.size()\n    boxes = multi_bboxes[idx.view(-1), :].view(num_classes, num_dets, 4)\n    coeffs = multi_coeffs[idx.view(-1), :].view(num_classes, num_dets, -1)\n\n    iou = bbox_overlaps(boxes, boxes)  # [#class, topk, topk]\n    iou.triu_(diagonal=1)\n    iou_max, _ = iou.max(dim=1)\n\n    # Now just filter out the ones higher than the threshold\n    keep = iou_max <= iou_thr\n\n    # Second thresholding introduces 0.2 mAP gain at negligible time cost\n    keep *= scores > score_thr\n\n    # Assign each kept detection to its corresponding class\n    classes = torch.arange(\n        num_classes, device=boxes.device)[:, None].expand_as(keep)\n    classes = classes[keep]\n\n    boxes = boxes[keep]\n    coeffs = coeffs[keep]\n    scores = scores[keep]\n\n    # Only keep the top max_num highest scores across all classes\n    scores, idx = scores.sort(0, descending=True)\n    if max_num > 0:\n        idx = idx[:max_num]\n        scores = scores[:max_num]\n\n    classes = classes[idx]\n    boxes = boxes[idx]\n    coeffs = coeffs[idx]\n\n    cls_dets = torch.cat([boxes, scores[:, None]], dim=1)\n    return cls_dets, classes, coeffs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/post_processing/matrix_nms.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef mask_matrix_nms(masks,\n                    labels,\n                    scores,\n                    filter_thr=-1,\n                    nms_pre=-1,\n                    max_num=-1,\n                    kernel='gaussian',\n                    sigma=2.0,\n                    mask_area=None):\n    \"\"\"Matrix NMS for multi-class masks.\n\n    Args:\n        masks (Tensor): Has shape (num_instances, h, w)\n        labels (Tensor): Labels of corresponding masks,\n            has shape (num_instances,).\n        scores (Tensor): Mask scores of corresponding masks,\n            has shape (num_instances).\n        filter_thr (float): Score threshold to filter the masks\n            after matrix nms. Default: -1, which means do not\n            use filter_thr.\n        nms_pre (int): The max number of instances to do the matrix nms.\n            Default: -1, which means do not use nms_pre.\n        max_num (int, optional): If there are more than max_num masks after\n            matrix, only top max_num will be kept. Default: -1, which means\n            do not use max_num.\n        kernel (str): 'linear' or 'gaussian'.\n        sigma (float): std in gaussian method.\n        mask_area (Tensor): The sum of seg_masks.\n\n    Returns:\n        tuple(Tensor): Processed mask results.\n\n            - scores (Tensor): Updated scores, has shape (n,).\n            - labels (Tensor): Remained labels, has shape (n,).\n            - masks (Tensor): Remained masks, has shape (n, w, h).\n            - keep_inds (Tensor): The indices number of\n                the remaining mask in the input mask, has shape (n,).\n    \"\"\"\n    assert len(labels) == len(masks) == len(scores)\n    if len(labels) == 0:\n        return scores.new_zeros(0), labels.new_zeros(0), masks.new_zeros(\n            0, *masks.shape[-2:]), labels.new_zeros(0)\n    if mask_area is None:\n        mask_area = masks.sum((1, 2)).float()\n    else:\n        assert len(masks) == len(mask_area)\n\n    # sort and keep top nms_pre\n    scores, sort_inds = torch.sort(scores, descending=True)\n\n    keep_inds = sort_inds\n    if nms_pre > 0 and len(sort_inds) > nms_pre:\n        sort_inds = sort_inds[:nms_pre]\n        keep_inds = keep_inds[:nms_pre]\n        scores = scores[:nms_pre]\n    masks = masks[sort_inds]\n    mask_area = mask_area[sort_inds]\n    labels = labels[sort_inds]\n\n    num_masks = len(labels)\n    flatten_masks = masks.reshape(num_masks, -1).float()\n    # inter.\n    inter_matrix = torch.mm(flatten_masks, flatten_masks.transpose(1, 0))\n    expanded_mask_area = mask_area.expand(num_masks, num_masks)\n    # Upper triangle iou matrix.\n    iou_matrix = (inter_matrix /\n                  (expanded_mask_area + expanded_mask_area.transpose(1, 0) -\n                   inter_matrix)).triu(diagonal=1)\n    # label_specific matrix.\n    expanded_labels = labels.expand(num_masks, num_masks)\n    # Upper triangle label matrix.\n    label_matrix = (expanded_labels == expanded_labels.transpose(\n        1, 0)).triu(diagonal=1)\n\n    # IoU compensation\n    compensate_iou, _ = (iou_matrix * label_matrix).max(0)\n    compensate_iou = compensate_iou.expand(num_masks,\n                                           num_masks).transpose(1, 0)\n\n    # IoU decay\n    decay_iou = iou_matrix * label_matrix\n\n    # Calculate the decay_coefficient\n    if kernel == 'gaussian':\n        decay_matrix = torch.exp(-1 * sigma * (decay_iou**2))\n        compensate_matrix = torch.exp(-1 * sigma * (compensate_iou**2))\n        decay_coefficient, _ = (decay_matrix / compensate_matrix).min(0)\n    elif kernel == 'linear':\n        decay_matrix = (1 - decay_iou) / (1 - compensate_iou)\n        decay_coefficient, _ = decay_matrix.min(0)\n    else:\n        raise NotImplementedError(\n            f'{kernel} kernel is not supported in matrix nms!')\n    # update the score.\n    scores = scores * decay_coefficient\n\n    if filter_thr > 0:\n        keep = scores >= filter_thr\n        keep_inds = keep_inds[keep]\n        if not keep.any():\n            return scores.new_zeros(0), labels.new_zeros(0), masks.new_zeros(\n                0, *masks.shape[-2:]), labels.new_zeros(0)\n        masks = masks[keep]\n        scores = scores[keep]\n        labels = labels[keep]\n\n    # sort and keep top max_num\n    scores, sort_inds = torch.sort(scores, descending=True)\n    keep_inds = keep_inds[sort_inds]\n    if max_num > 0 and len(sort_inds) > max_num:\n        sort_inds = sort_inds[:max_num]\n        keep_inds = keep_inds[:max_num]\n        scores = scores[:max_num]\n    masks = masks[sort_inds]\n    labels = labels[sort_inds]\n\n    return scores, labels, masks, keep_inds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/post_processing/merge_augs.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport warnings\n\nimport numpy as np\nimport torch\nfrom mmcv import ConfigDict\nfrom mmcv.ops import nms\n\nfrom ..bbox import bbox_mapping_back\n\n\ndef merge_aug_proposals(aug_proposals, img_metas, cfg):\n    \"\"\"Merge augmented proposals (multiscale, flip, etc.)\n\n    Args:\n        aug_proposals (list[Tensor]): proposals from different testing\n            schemes, shape (n, 5). Note that they are not rescaled to the\n            original image size.\n\n        img_metas (list[dict]): list of image info dict where each dict has:\n            'img_shape', 'scale_factor', 'flip', and may also contain\n            'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n            For details on the values of these keys see\n            `mmdet/datasets/pipelines/formatting.py:Collect`.\n\n        cfg (dict): rpn test config.\n\n    Returns:\n        Tensor: shape (n, 4), proposals corresponding to original image scale.\n    \"\"\"\n\n    cfg = copy.deepcopy(cfg)\n\n    # deprecate arguments warning\n    if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg:\n        warnings.warn(\n            'In rpn_proposal or test_cfg, '\n            'nms_thr has been moved to a dict named nms as '\n            'iou_threshold, max_num has been renamed as max_per_img, '\n            'name of original arguments and the way to specify '\n            'iou_threshold of NMS will be deprecated.')\n    if 'nms' not in cfg:\n        cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr))\n    if 'max_num' in cfg:\n        if 'max_per_img' in cfg:\n            assert cfg.max_num == cfg.max_per_img, f'You set max_num and ' \\\n                f'max_per_img at the same time, but get {cfg.max_num} ' \\\n                f'and {cfg.max_per_img} respectively' \\\n                f'Please delete max_num which will be deprecated.'\n        else:\n            cfg.max_per_img = cfg.max_num\n    if 'nms_thr' in cfg:\n        assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set ' \\\n            f'iou_threshold in nms and ' \\\n            f'nms_thr at the same time, but get ' \\\n            f'{cfg.nms.iou_threshold} and {cfg.nms_thr}' \\\n            f' respectively. Please delete the nms_thr ' \\\n            f'which will be deprecated.'\n\n    recovered_proposals = []\n    for proposals, img_info in zip(aug_proposals, img_metas):\n        img_shape = img_info['img_shape']\n        scale_factor = img_info['scale_factor']\n        flip = img_info['flip']\n        flip_direction = img_info['flip_direction']\n        _proposals = proposals.clone()\n        _proposals[:, :4] = bbox_mapping_back(_proposals[:, :4], img_shape,\n                                              scale_factor, flip,\n                                              flip_direction)\n        recovered_proposals.append(_proposals)\n    aug_proposals = torch.cat(recovered_proposals, dim=0)\n    merged_proposals, _ = nms(aug_proposals[:, :4].contiguous(),\n                              aug_proposals[:, -1].contiguous(),\n                              cfg.nms.iou_threshold)\n    scores = merged_proposals[:, 4]\n    _, order = scores.sort(0, descending=True)\n    num = min(cfg.max_per_img, merged_proposals.shape[0])\n    order = order[:num]\n    merged_proposals = merged_proposals[order, :]\n    return merged_proposals\n\n\ndef merge_aug_bboxes(aug_bboxes, aug_scores, img_metas, rcnn_test_cfg):\n    \"\"\"Merge augmented detection bboxes and scores.\n\n    Args:\n        aug_bboxes (list[Tensor]): shape (n, 4*#class)\n        aug_scores (list[Tensor] or None): shape (n, #class)\n        img_shapes (list[Tensor]): shape (3, ).\n        rcnn_test_cfg (dict): rcnn test config.\n\n    Returns:\n        tuple: (bboxes, scores)\n    \"\"\"\n    recovered_bboxes = []\n    for bboxes, img_info in zip(aug_bboxes, img_metas):\n        img_shape = img_info[0]['img_shape']\n        scale_factor = img_info[0]['scale_factor']\n        flip = img_info[0]['flip']\n        flip_direction = img_info[0]['flip_direction']\n        bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip,\n                                   flip_direction)\n        recovered_bboxes.append(bboxes)\n    bboxes = torch.stack(recovered_bboxes).mean(dim=0)\n    if aug_scores is None:\n        return bboxes\n    else:\n        scores = torch.stack(aug_scores).mean(dim=0)\n        return bboxes, scores\n\n\ndef merge_aug_scores(aug_scores):\n    \"\"\"Merge augmented bbox scores.\"\"\"\n    if isinstance(aug_scores[0], torch.Tensor):\n        return torch.mean(torch.stack(aug_scores), dim=0)\n    else:\n        return np.mean(aug_scores, axis=0)\n\n\ndef merge_aug_masks(aug_masks, img_metas, rcnn_test_cfg, weights=None):\n    \"\"\"Merge augmented mask prediction.\n\n    Args:\n        aug_masks (list[ndarray]): shape (n, #class, h, w)\n        img_shapes (list[ndarray]): shape (3, ).\n        rcnn_test_cfg (dict): rcnn test config.\n\n    Returns:\n        tuple: (bboxes, scores)\n    \"\"\"\n    recovered_masks = []\n    for mask, img_info in zip(aug_masks, img_metas):\n        flip = img_info[0]['flip']\n        if flip:\n            flip_direction = img_info[0]['flip_direction']\n            if flip_direction == 'horizontal':\n                mask = mask[:, :, :, ::-1]\n            elif flip_direction == 'vertical':\n                mask = mask[:, :, ::-1, :]\n            elif flip_direction == 'diagonal':\n                mask = mask[:, :, :, ::-1]\n                mask = mask[:, :, ::-1, :]\n            else:\n                raise ValueError(\n                    f\"Invalid flipping direction '{flip_direction}'\")\n        recovered_masks.append(mask)\n\n    if weights is None:\n        merged_masks = np.mean(recovered_masks, axis=0)\n    else:\n        merged_masks = np.average(\n            np.array(recovered_masks), axis=0, weights=np.array(weights))\n    return merged_masks\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .dist_utils import (DistOptimizerHook, all_reduce_dict, allreduce_grads,\n                         reduce_mean, sync_random_seed)\nfrom .misc import (center_of_mass, filter_scores_and_topk, flip_tensor,\n                   generate_coordinate, mask2ndarray, multi_apply,\n                   select_single_mlvl, unmap)\n\n__all__ = [\n    'allreduce_grads', 'DistOptimizerHook', 'reduce_mean', 'multi_apply',\n    'unmap', 'mask2ndarray', 'flip_tensor', 'all_reduce_dict',\n    'center_of_mass', 'generate_coordinate', 'select_single_mlvl',\n    'filter_scores_and_topk', 'sync_random_seed'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/utils/dist_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport functools\nimport pickle\nimport warnings\nfrom collections import OrderedDict\n\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import OptimizerHook, get_dist_info\nfrom torch._utils import (_flatten_dense_tensors, _take_tensors,\n                          _unflatten_dense_tensors)\n\n\ndef _allreduce_coalesced(tensors, world_size, bucket_size_mb=-1):\n    if bucket_size_mb > 0:\n        bucket_size_bytes = bucket_size_mb * 1024 * 1024\n        buckets = _take_tensors(tensors, bucket_size_bytes)\n    else:\n        buckets = OrderedDict()\n        for tensor in tensors:\n            tp = tensor.type()\n            if tp not in buckets:\n                buckets[tp] = []\n            buckets[tp].append(tensor)\n        buckets = buckets.values()\n\n    for bucket in buckets:\n        flat_tensors = _flatten_dense_tensors(bucket)\n        dist.all_reduce(flat_tensors)\n        flat_tensors.div_(world_size)\n        for tensor, synced in zip(\n                bucket, _unflatten_dense_tensors(flat_tensors, bucket)):\n            tensor.copy_(synced)\n\n\ndef allreduce_grads(params, coalesce=True, bucket_size_mb=-1):\n    \"\"\"Allreduce gradients.\n\n    Args:\n        params (list[torch.Parameters]): List of parameters of a model\n        coalesce (bool, optional): Whether allreduce parameters as a whole.\n            Defaults to True.\n        bucket_size_mb (int, optional): Size of bucket, the unit is MB.\n            Defaults to -1.\n    \"\"\"\n    grads = [\n        param.grad.data for param in params\n        if param.requires_grad and param.grad is not None\n    ]\n    world_size = dist.get_world_size()\n    if coalesce:\n        _allreduce_coalesced(grads, world_size, bucket_size_mb)\n    else:\n        for tensor in grads:\n            dist.all_reduce(tensor.div_(world_size))\n\n\nclass DistOptimizerHook(OptimizerHook):\n    \"\"\"Deprecated optimizer hook for distributed training.\"\"\"\n\n    def __init__(self, *args, **kwargs):\n        warnings.warn('\"DistOptimizerHook\" is deprecated, please switch to'\n                      '\"mmcv.runner.OptimizerHook\".')\n        super().__init__(*args, **kwargs)\n\n\ndef reduce_mean(tensor):\n    \"\"\"\"Obtain the mean of tensor on different GPUs.\"\"\"\n    if not (dist.is_available() and dist.is_initialized()):\n        return tensor\n    tensor = tensor.clone()\n    dist.all_reduce(tensor.div_(dist.get_world_size()), op=dist.ReduceOp.SUM)\n    return tensor\n\n\ndef obj2tensor(pyobj, device='cuda'):\n    \"\"\"Serialize picklable python object to tensor.\"\"\"\n    storage = torch.ByteStorage.from_buffer(pickle.dumps(pyobj))\n    return torch.ByteTensor(storage).to(device=device)\n\n\ndef tensor2obj(tensor):\n    \"\"\"Deserialize tensor to picklable python object.\"\"\"\n    return pickle.loads(tensor.cpu().numpy().tobytes())\n\n\n@functools.lru_cache()\ndef _get_global_gloo_group():\n    \"\"\"Return a process group based on gloo backend, containing all the ranks\n    The result is cached.\"\"\"\n    if dist.get_backend() == 'nccl':\n        return dist.new_group(backend='gloo')\n    else:\n        return dist.group.WORLD\n\n\ndef all_reduce_dict(py_dict, op='sum', group=None, to_float=True):\n    \"\"\"Apply all reduce function for python dict object.\n\n    The code is modified from https://github.com/Megvii-\n    BaseDetection/YOLOX/blob/main/yolox/utils/allreduce_norm.py.\n\n    NOTE: make sure that py_dict in different ranks has the same keys and\n    the values should be in the same shape. Currently only supports\n    nccl backend.\n\n    Args:\n        py_dict (dict): Dict to be applied all reduce op.\n        op (str): Operator, could be 'sum' or 'mean'. Default: 'sum'\n        group (:obj:`torch.distributed.group`, optional): Distributed group,\n            Default: None.\n        to_float (bool): Whether to convert all values of dict to float.\n            Default: True.\n\n    Returns:\n        OrderedDict: reduced python dict object.\n    \"\"\"\n    warnings.warn(\n        'group` is deprecated. Currently only supports NCCL backend.')\n    _, world_size = get_dist_info()\n    if world_size == 1:\n        return py_dict\n\n    # all reduce logic across different devices.\n    py_key = list(py_dict.keys())\n    if not isinstance(py_dict, OrderedDict):\n        py_key_tensor = obj2tensor(py_key)\n        dist.broadcast(py_key_tensor, src=0)\n        py_key = tensor2obj(py_key_tensor)\n\n    tensor_shapes = [py_dict[k].shape for k in py_key]\n    tensor_numels = [py_dict[k].numel() for k in py_key]\n\n    if to_float:\n        warnings.warn('Note: the \"to_float\" is True, you need to '\n                      'ensure that the behavior is reasonable.')\n        flatten_tensor = torch.cat(\n            [py_dict[k].flatten().float() for k in py_key])\n    else:\n        flatten_tensor = torch.cat([py_dict[k].flatten() for k in py_key])\n\n    dist.all_reduce(flatten_tensor, op=dist.ReduceOp.SUM)\n    if op == 'mean':\n        flatten_tensor /= world_size\n\n    split_tensors = [\n        x.reshape(shape) for x, shape in zip(\n            torch.split(flatten_tensor, tensor_numels), tensor_shapes)\n    ]\n    out_dict = {k: v for k, v in zip(py_key, split_tensors)}\n    if isinstance(py_dict, OrderedDict):\n        out_dict = OrderedDict(out_dict)\n    return out_dict\n\n\ndef sync_random_seed(seed=None, device='cuda'):\n    \"\"\"Make sure different ranks share the same seed.\n\n    All workers must call this function, otherwise it will deadlock.\n    This method is generally used in `DistributedSampler`,\n    because the seed should be identical across all processes\n    in the distributed group.\n\n    In distributed sampling, different ranks should sample non-overlapped\n    data in the dataset. Therefore, this function is used to make sure that\n    each rank shuffles the data indices in the same order based\n    on the same seed. Then different ranks could use different indices\n    to select non-overlapped data from the same data list.\n\n    Args:\n        seed (int, Optional): The seed. Default to None.\n        device (str): The device where the seed will be put on.\n            Default to 'cuda'.\n\n    Returns:\n        int: Seed to be used.\n    \"\"\"\n    if seed is None:\n        seed = np.random.randint(2**31)\n    assert isinstance(seed, int)\n\n    rank, world_size = get_dist_info()\n\n    if world_size == 1:\n        return seed\n\n    if rank == 0:\n        random_num = torch.tensor(seed, dtype=torch.int32, device=device)\n    else:\n        random_num = torch.tensor(0, dtype=torch.int32, device=device)\n    dist.broadcast(random_num, src=0)\n    return random_num.item()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/utils/misc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom functools import partial\n\nimport numpy as np\nimport torch\nfrom six.moves import map, zip\n\nfrom ..mask.structures import BitmapMasks, PolygonMasks\n\n\ndef multi_apply(func, *args, **kwargs):\n    \"\"\"Apply function to a list of arguments.\n\n    Note:\n        This function applies the ``func`` to multiple inputs and\n        map the multiple outputs of the ``func`` into different\n        list. Each list contains the same type of outputs corresponding\n        to different inputs.\n\n    Args:\n        func (Function): A function that will be applied to a list of\n            arguments\n\n    Returns:\n        tuple(list): A tuple containing multiple list, each list contains \\\n            a kind of returned results by the function\n    \"\"\"\n    pfunc = partial(func, **kwargs) if kwargs else func\n    map_results = map(pfunc, *args)\n    return tuple(map(list, zip(*map_results)))\n\n\ndef unmap(data, count, inds, fill=0):\n    \"\"\"Unmap a subset of item (data) back to the original set of items (of size\n    count)\"\"\"\n    if data.dim() == 1:\n        ret = data.new_full((count, ), fill)\n        ret[inds.type(torch.bool)] = data\n    else:\n        new_size = (count, ) + data.size()[1:]\n        ret = data.new_full(new_size, fill)\n        ret[inds.type(torch.bool), :] = data\n    return ret\n\n\ndef mask2ndarray(mask):\n    \"\"\"Convert Mask to ndarray..\n\n    Args:\n        mask (:obj:`BitmapMasks` or :obj:`PolygonMasks` or\n        torch.Tensor or np.ndarray): The mask to be converted.\n\n    Returns:\n        np.ndarray: Ndarray mask of shape (n, h, w) that has been converted\n    \"\"\"\n    if isinstance(mask, (BitmapMasks, PolygonMasks)):\n        mask = mask.to_ndarray()\n    elif isinstance(mask, torch.Tensor):\n        mask = mask.detach().cpu().numpy()\n    elif not isinstance(mask, np.ndarray):\n        raise TypeError(f'Unsupported {type(mask)} data type')\n    return mask\n\n\ndef flip_tensor(src_tensor, flip_direction):\n    \"\"\"flip tensor base on flip_direction.\n\n    Args:\n        src_tensor (Tensor): input feature map, shape (B, C, H, W).\n        flip_direction (str): The flipping direction. Options are\n          'horizontal', 'vertical', 'diagonal'.\n\n    Returns:\n        out_tensor (Tensor): Flipped tensor.\n    \"\"\"\n    assert src_tensor.ndim == 4\n    valid_directions = ['horizontal', 'vertical', 'diagonal']\n    assert flip_direction in valid_directions\n    if flip_direction == 'horizontal':\n        out_tensor = torch.flip(src_tensor, [3])\n    elif flip_direction == 'vertical':\n        out_tensor = torch.flip(src_tensor, [2])\n    else:\n        out_tensor = torch.flip(src_tensor, [2, 3])\n    return out_tensor\n\n\ndef select_single_mlvl(mlvl_tensors, batch_id, detach=True):\n    \"\"\"Extract a multi-scale single image tensor from a multi-scale batch\n    tensor based on batch index.\n\n    Note: The default value of detach is True, because the proposal gradient\n    needs to be detached during the training of the two-stage model. E.g\n    Cascade Mask R-CNN.\n\n    Args:\n        mlvl_tensors (list[Tensor]): Batch tensor for all scale levels,\n           each is a 4D-tensor.\n        batch_id (int): Batch index.\n        detach (bool): Whether detach gradient. Default True.\n\n    Returns:\n        list[Tensor]: Multi-scale single image tensor.\n    \"\"\"\n    assert isinstance(mlvl_tensors, (list, tuple))\n    num_levels = len(mlvl_tensors)\n\n    if detach:\n        mlvl_tensor_list = [\n            mlvl_tensors[i][batch_id].detach() for i in range(num_levels)\n        ]\n    else:\n        mlvl_tensor_list = [\n            mlvl_tensors[i][batch_id] for i in range(num_levels)\n        ]\n    return mlvl_tensor_list\n\n\ndef filter_scores_and_topk(scores, score_thr, topk, results=None):\n    \"\"\"Filter results using score threshold and topk candidates.\n\n    Args:\n        scores (Tensor): The scores, shape (num_bboxes, K).\n        score_thr (float): The score filter threshold.\n        topk (int): The number of topk candidates.\n        results (dict or list or Tensor, Optional): The results to\n           which the filtering rule is to be applied. The shape\n           of each item is (num_bboxes, N).\n\n    Returns:\n        tuple: Filtered results\n\n            - scores (Tensor): The scores after being filtered, \\\n                shape (num_bboxes_filtered, ).\n            - labels (Tensor): The class labels, shape \\\n                (num_bboxes_filtered, ).\n            - anchor_idxs (Tensor): The anchor indexes, shape \\\n                (num_bboxes_filtered, ).\n            - filtered_results (dict or list or Tensor, Optional): \\\n                The filtered results. The shape of each item is \\\n                (num_bboxes_filtered, N).\n    \"\"\"\n    valid_mask = scores > score_thr\n    scores = scores[valid_mask]\n    valid_idxs = torch.nonzero(valid_mask)\n\n    num_topk = min(topk, valid_idxs.size(0))\n    # torch.sort is actually faster than .topk (at least on GPUs)\n    scores, idxs = scores.sort(descending=True)\n    scores = scores[:num_topk]\n    topk_idxs = valid_idxs[idxs[:num_topk]]\n    keep_idxs, labels = topk_idxs.unbind(dim=1)\n\n    filtered_results = None\n    if results is not None:\n        if isinstance(results, dict):\n            filtered_results = {k: v[keep_idxs] for k, v in results.items()}\n        elif isinstance(results, list):\n            filtered_results = [result[keep_idxs] for result in results]\n        elif isinstance(results, torch.Tensor):\n            filtered_results = results[keep_idxs]\n        else:\n            raise NotImplementedError(f'Only supports dict or list or Tensor, '\n                                      f'but get {type(results)}.')\n    return scores, labels, keep_idxs, filtered_results\n\n\ndef center_of_mass(mask, esp=1e-6):\n    \"\"\"Calculate the centroid coordinates of the mask.\n\n    Args:\n        mask (Tensor): The mask to be calculated, shape (h, w).\n        esp (float): Avoid dividing by zero. Default: 1e-6.\n\n    Returns:\n        tuple[Tensor]: the coordinates of the center point of the mask.\n\n            - center_h (Tensor): the center point of the height.\n            - center_w (Tensor): the center point of the width.\n    \"\"\"\n    h, w = mask.shape\n    grid_h = torch.arange(h, device=mask.device)[:, None]\n    grid_w = torch.arange(w, device=mask.device)\n    normalizer = mask.sum().float().clamp(min=esp)\n    center_h = (mask * grid_h).sum() / normalizer\n    center_w = (mask * grid_w).sum() / normalizer\n    return center_h, center_w\n\n\ndef generate_coordinate(featmap_sizes, device='cuda'):\n    \"\"\"Generate the coordinate.\n\n    Args:\n        featmap_sizes (tuple): The feature to be calculated,\n            of shape (N, C, W, H).\n        device (str): The device where the feature will be put on.\n    Returns:\n        coord_feat (Tensor): The coordinate feature, of shape (N, 2, W, H).\n    \"\"\"\n\n    x_range = torch.linspace(-1, 1, featmap_sizes[-1], device=device)\n    y_range = torch.linspace(-1, 1, featmap_sizes[-2], device=device)\n    y, x = torch.meshgrid(y_range, x_range)\n    y = y.expand([featmap_sizes[0], 1, -1, -1])\n    x = x.expand([featmap_sizes[0], 1, -1, -1])\n    coord_feat = torch.cat([x, y], 1)\n\n    return coord_feat\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/visualization/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .image import (color_val_matplotlib, imshow_det_bboxes,\n                    imshow_gt_det_bboxes)\nfrom .palette import get_palette, palette_val\n\n__all__ = [\n    'imshow_det_bboxes', 'imshow_gt_det_bboxes', 'color_val_matplotlib',\n    'palette_val', 'get_palette'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/visualization/image.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport sys\n\nimport cv2\nimport matplotlib.pyplot as plt\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as mask_util\nfrom matplotlib.collections import PatchCollection\nfrom matplotlib.patches import Polygon\n\nfrom mmdet.core.evaluation.panoptic_utils import INSTANCE_OFFSET\nfrom ..mask.structures import bitmap_to_polygon\nfrom ..utils import mask2ndarray\nfrom .palette import get_palette, palette_val\n\n__all__ = [\n    'color_val_matplotlib', 'draw_masks', 'draw_bboxes', 'draw_labels',\n    'imshow_det_bboxes', 'imshow_gt_det_bboxes'\n]\n\nEPS = 1e-2\n\n\ndef color_val_matplotlib(color):\n    \"\"\"Convert various input in BGR order to normalized RGB matplotlib color\n    tuples.\n\n    Args:\n        color (:obj`Color` | str | tuple | int | ndarray): Color inputs.\n\n    Returns:\n        tuple[float]: A tuple of 3 normalized floats indicating RGB channels.\n    \"\"\"\n    color = mmcv.color_val(color)\n    color = [color / 255 for color in color[::-1]]\n    return tuple(color)\n\n\ndef _get_adaptive_scales(areas, min_area=800, max_area=30000):\n    \"\"\"Get adaptive scales according to areas.\n\n    The scale range is [0.5, 1.0]. When the area is less than\n    ``'min_area'``, the scale is 0.5 while the area is larger than\n    ``'max_area'``, the scale is 1.0.\n\n    Args:\n        areas (ndarray): The areas of bboxes or masks with the\n            shape of (n, ).\n        min_area (int): Lower bound areas for adaptive scales.\n            Default: 800.\n        max_area (int): Upper bound areas for adaptive scales.\n            Default: 30000.\n\n    Returns:\n        ndarray: The adaotive scales with the shape of (n, ).\n    \"\"\"\n    scales = 0.5 + (areas - min_area) / (max_area - min_area)\n    scales = np.clip(scales, 0.5, 1.0)\n    return scales\n\n\ndef _get_bias_color(base, max_dist=30):\n    \"\"\"Get different colors for each masks.\n\n    Get different colors for each masks by adding a bias\n    color to the base category color.\n    Args:\n        base (ndarray): The base category color with the shape\n            of (3, ).\n        max_dist (int): The max distance of bias. Default: 30.\n\n    Returns:\n        ndarray: The new color for a mask with the shape of (3, ).\n    \"\"\"\n    new_color = base + np.random.randint(\n        low=-max_dist, high=max_dist + 1, size=3)\n    return np.clip(new_color, 0, 255, new_color)\n\n\ndef draw_bboxes(ax, bboxes, color='g', alpha=0.8, thickness=2):\n    \"\"\"Draw bounding boxes on the axes.\n\n    Args:\n        ax (matplotlib.Axes): The input axes.\n        bboxes (ndarray): The input bounding boxes with the shape\n            of (n, 4).\n        color (list[tuple] | matplotlib.color): the colors for each\n            bounding boxes.\n        alpha (float): Transparency of bounding boxes. Default: 0.8.\n        thickness (int): Thickness of lines. Default: 2.\n\n    Returns:\n        matplotlib.Axes: The result axes.\n    \"\"\"\n    polygons = []\n    for i, bbox in enumerate(bboxes):\n        bbox_int = bbox.astype(np.int32)\n        poly = [[bbox_int[0], bbox_int[1]], [bbox_int[0], bbox_int[3]],\n                [bbox_int[2], bbox_int[3]], [bbox_int[2], bbox_int[1]]]\n        np_poly = np.array(poly).reshape((4, 2))\n        polygons.append(Polygon(np_poly))\n    p = PatchCollection(\n        polygons,\n        facecolor='none',\n        edgecolors=color,\n        linewidths=thickness,\n        alpha=alpha)\n    ax.add_collection(p)\n\n    return ax\n\n\ndef draw_labels(ax,\n                labels,\n                positions,\n                scores=None,\n                class_names=None,\n                color='w',\n                font_size=8,\n                scales=None,\n                horizontal_alignment='left'):\n    \"\"\"Draw labels on the axes.\n\n    Args:\n        ax (matplotlib.Axes): The input axes.\n        labels (ndarray): The labels with the shape of (n, ).\n        positions (ndarray): The positions to draw each labels.\n        scores (ndarray): The scores for each labels.\n        class_names (list[str]): The class names.\n        color (list[tuple] | matplotlib.color): The colors for labels.\n        font_size (int): Font size of texts. Default: 8.\n        scales (list[float]): Scales of texts. Default: None.\n        horizontal_alignment (str): The horizontal alignment method of\n            texts. Default: 'left'.\n\n    Returns:\n        matplotlib.Axes: The result axes.\n    \"\"\"\n    for i, (pos, label) in enumerate(zip(positions, labels)):\n        label_text = class_names[\n            label] if class_names is not None else f'class {label}'\n        if scores is not None:\n            label_text += f'|{scores[i]:.02f}'\n        text_color = color[i] if isinstance(color, list) else color\n\n        font_size_mask = font_size if scales is None else font_size * scales[i]\n        ax.text(\n            pos[0],\n            pos[1],\n            f'{label_text}',\n            bbox={\n                'facecolor': 'black',\n                'alpha': 0.8,\n                'pad': 0.7,\n                'edgecolor': 'none'\n            },\n            color=text_color,\n            fontsize=font_size_mask,\n            verticalalignment='top',\n            horizontalalignment=horizontal_alignment)\n\n    return ax\n\n\ndef draw_masks(ax, img, masks, color=None, with_edge=True, alpha=0.8):\n    \"\"\"Draw masks on the image and their edges on the axes.\n\n    Args:\n        ax (matplotlib.Axes): The input axes.\n        img (ndarray): The image with the shape of (3, h, w).\n        masks (ndarray): The masks with the shape of (n, h, w).\n        color (ndarray): The colors for each masks with the shape\n            of (n, 3).\n        with_edge (bool): Whether to draw edges. Default: True.\n        alpha (float): Transparency of bounding boxes. Default: 0.8.\n\n    Returns:\n        matplotlib.Axes: The result axes.\n        ndarray: The result image.\n    \"\"\"\n    taken_colors = set([0, 0, 0])\n    if color is None:\n        random_colors = np.random.randint(0, 255, (masks.size(0), 3))\n        color = [tuple(c) for c in random_colors]\n        color = np.array(color, dtype=np.uint8)\n    polygons = []\n    for i, mask in enumerate(masks):\n        if with_edge:\n            contours, _ = bitmap_to_polygon(mask)\n            polygons += [Polygon(c) for c in contours]\n\n        color_mask = color[i]\n        while tuple(color_mask) in taken_colors:\n            color_mask = _get_bias_color(color_mask)\n        taken_colors.add(tuple(color_mask))\n\n        mask = mask.astype(bool)\n        img[mask] = img[mask] * (1 - alpha) + color_mask * alpha\n\n    p = PatchCollection(\n        polygons, facecolor='none', edgecolors='w', linewidths=1, alpha=0.8)\n    ax.add_collection(p)\n\n    return ax, img\n\n\ndef imshow_det_bboxes(img,\n                      bboxes=None,\n                      labels=None,\n                      segms=None,\n                      class_names=None,\n                      score_thr=0,\n                      bbox_color='green',\n                      text_color='green',\n                      mask_color=None,\n                      thickness=2,\n                      font_size=8,\n                      win_name='',\n                      show=True,\n                      wait_time=0,\n                      out_file=None):\n    \"\"\"Draw bboxes and class labels (with scores) on an image.\n\n    Args:\n        img (str | ndarray): The image to be displayed.\n        bboxes (ndarray): Bounding boxes (with scores), shaped (n, 4) or\n            (n, 5).\n        labels (ndarray): Labels of bboxes.\n        segms (ndarray | None): Masks, shaped (n,h,w) or None.\n        class_names (list[str]): Names of each classes.\n        score_thr (float): Minimum score of bboxes to be shown. Default: 0.\n        bbox_color (list[tuple] | tuple | str | None): Colors of bbox lines.\n           If a single color is given, it will be applied to all classes.\n           The tuple of color should be in RGB order. Default: 'green'.\n        text_color (list[tuple] | tuple | str | None): Colors of texts.\n           If a single color is given, it will be applied to all classes.\n           The tuple of color should be in RGB order. Default: 'green'.\n        mask_color (list[tuple] | tuple | str | None, optional): Colors of\n           masks. If a single color is given, it will be applied to all\n           classes. The tuple of color should be in RGB order.\n           Default: None.\n        thickness (int): Thickness of lines. Default: 2.\n        font_size (int): Font size of texts. Default: 13.\n        show (bool): Whether to show the image. Default: True.\n        win_name (str): The window name. Default: ''.\n        wait_time (float): Value of waitKey param. Default: 0.\n        out_file (str, optional): The filename to write the image.\n            Default: None.\n\n    Returns:\n        ndarray: The image with bboxes drawn on it.\n    \"\"\"\n    assert bboxes is None or bboxes.ndim == 2, \\\n        f' bboxes ndim should be 2, but its ndim is {bboxes.ndim}.'\n    assert labels.ndim == 1, \\\n        f' labels ndim should be 1, but its ndim is {labels.ndim}.'\n    assert bboxes is None or bboxes.shape[1] == 4 or bboxes.shape[1] == 5, \\\n        f' bboxes.shape[1] should be 4 or 5, but its {bboxes.shape[1]}.'\n    assert bboxes is None or bboxes.shape[0] <= labels.shape[0], \\\n        'labels.shape[0] should not be less than bboxes.shape[0].'\n    assert segms is None or segms.shape[0] == labels.shape[0], \\\n        'segms.shape[0] and labels.shape[0] should have the same length.'\n    assert segms is not None or bboxes is not None, \\\n        'segms and bboxes should not be None at the same time.'\n\n    img = mmcv.imread(img).astype(np.uint8)\n\n    if score_thr > 0:\n        assert bboxes is not None and bboxes.shape[1] == 5\n        scores = bboxes[:, -1]\n        inds = scores > score_thr\n        bboxes = bboxes[inds, :]\n        labels = labels[inds]\n        if segms is not None:\n            segms = segms[inds, ...]\n\n    img = mmcv.bgr2rgb(img)\n    width, height = img.shape[1], img.shape[0]\n    img = np.ascontiguousarray(img)\n\n    fig = plt.figure(win_name, frameon=False)\n    plt.title(win_name)\n    canvas = fig.canvas\n    dpi = fig.get_dpi()\n    # add a small EPS to avoid precision lost due to matplotlib's truncation\n    # (https://github.com/matplotlib/matplotlib/issues/15363)\n    fig.set_size_inches((width + EPS) / dpi, (height + EPS) / dpi)\n\n    # remove white edges by set subplot margin\n    plt.subplots_adjust(left=0, right=1, bottom=0, top=1)\n    ax = plt.gca()\n    ax.axis('off')\n\n    max_label = int(max(labels) if len(labels) > 0 else 0)\n    text_palette = palette_val(get_palette(text_color, max_label + 1))\n    text_colors = [text_palette[label] for label in labels]\n\n    num_bboxes = 0\n    if bboxes is not None:\n        num_bboxes = bboxes.shape[0]\n        bbox_palette = palette_val(get_palette(bbox_color, max_label + 1))\n        colors = [bbox_palette[label] for label in labels[:num_bboxes]]\n        draw_bboxes(ax, bboxes, colors, alpha=0.8, thickness=thickness)\n\n        horizontal_alignment = 'left'\n        positions = bboxes[:, :2].astype(np.int32) + thickness\n        areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])\n        scales = _get_adaptive_scales(areas)\n        scores = bboxes[:, 4] if bboxes.shape[1] == 5 else None\n        draw_labels(\n            ax,\n            labels[:num_bboxes],\n            positions,\n            scores=scores,\n            class_names=class_names,\n            color=text_colors,\n            font_size=font_size,\n            scales=scales,\n            horizontal_alignment=horizontal_alignment)\n\n    if segms is not None:\n        mask_palette = get_palette(mask_color, max_label + 1)\n        colors = [mask_palette[label] for label in labels]\n        colors = np.array(colors, dtype=np.uint8)\n        draw_masks(ax, img, segms, colors, with_edge=True)\n\n        if num_bboxes < segms.shape[0]:\n            segms = segms[num_bboxes:]\n            horizontal_alignment = 'center'\n            areas = []\n            positions = []\n            for mask in segms:\n                _, _, stats, centroids = cv2.connectedComponentsWithStats(\n                    mask.astype(np.uint8), connectivity=8)\n                largest_id = np.argmax(stats[1:, -1]) + 1\n                positions.append(centroids[largest_id])\n                areas.append(stats[largest_id, -1])\n            areas = np.stack(areas, axis=0)\n            scales = _get_adaptive_scales(areas)\n            draw_labels(\n                ax,\n                labels[num_bboxes:],\n                positions,\n                class_names=class_names,\n                color=text_colors,\n                font_size=font_size,\n                scales=scales,\n                horizontal_alignment=horizontal_alignment)\n\n    plt.imshow(img)\n\n    stream, _ = canvas.print_to_buffer()\n    buffer = np.frombuffer(stream, dtype='uint8')\n    if sys.platform == 'darwin':\n        width, height = canvas.get_width_height(physical=True)\n    img_rgba = buffer.reshape(height, width, 4)\n    rgb, alpha = np.split(img_rgba, [3], axis=2)\n    img = rgb.astype('uint8')\n    img = mmcv.rgb2bgr(img)\n\n    if show:\n        # We do not use cv2 for display because in some cases, opencv will\n        # conflict with Qt, it will output a warning: Current thread\n        # is not the object's thread. You can refer to\n        # https://github.com/opencv/opencv-python/issues/46 for details\n        if wait_time == 0:\n            plt.show()\n        else:\n            plt.show(block=False)\n            plt.pause(wait_time)\n    if out_file is not None:\n        mmcv.imwrite(img, out_file)\n\n    plt.close()\n\n    return img\n\n\ndef imshow_gt_det_bboxes(img,\n                         annotation,\n                         result,\n                         class_names=None,\n                         score_thr=0,\n                         gt_bbox_color=(61, 102, 255),\n                         gt_text_color=(200, 200, 200),\n                         gt_mask_color=(61, 102, 255),\n                         det_bbox_color=(241, 101, 72),\n                         det_text_color=(200, 200, 200),\n                         det_mask_color=(241, 101, 72),\n                         thickness=2,\n                         font_size=13,\n                         win_name='',\n                         show=True,\n                         wait_time=0,\n                         out_file=None,\n                         overlay_gt_pred=True):\n    \"\"\"General visualization GT and result function.\n\n    Args:\n      img (str | ndarray): The image to be displayed.\n      annotation (dict): Ground truth annotations where contain keys of\n          'gt_bboxes' and 'gt_labels' or 'gt_masks'.\n      result (tuple[list] | list): The detection result, can be either\n          (bbox, segm) or just bbox.\n      class_names (list[str]): Names of each classes.\n      score_thr (float): Minimum score of bboxes to be shown. Default: 0.\n      gt_bbox_color (list[tuple] | tuple | str | None): Colors of bbox lines.\n          If a single color is given, it will be applied to all classes.\n          The tuple of color should be in RGB order. Default: (61, 102, 255).\n      gt_text_color (list[tuple] | tuple | str | None): Colors of texts.\n          If a single color is given, it will be applied to all classes.\n          The tuple of color should be in RGB order. Default: (200, 200, 200).\n      gt_mask_color (list[tuple] | tuple | str | None, optional): Colors of\n          masks. If a single color is given, it will be applied to all classes.\n          The tuple of color should be in RGB order. Default: (61, 102, 255).\n      det_bbox_color (list[tuple] | tuple | str | None):Colors of bbox lines.\n          If a single color is given, it will be applied to all classes.\n          The tuple of color should be in RGB order. Default: (241, 101, 72).\n      det_text_color (list[tuple] | tuple | str | None):Colors of texts.\n          If a single color is given, it will be applied to all classes.\n          The tuple of color should be in RGB order. Default: (200, 200, 200).\n      det_mask_color (list[tuple] | tuple | str | None, optional): Color of\n          masks. If a single color is given, it will be applied to all classes.\n          The tuple of color should be in RGB order. Default: (241, 101, 72).\n      thickness (int): Thickness of lines. Default: 2.\n      font_size (int): Font size of texts. Default: 13.\n      win_name (str): The window name. Default: ''.\n      show (bool): Whether to show the image. Default: True.\n      wait_time (float): Value of waitKey param. Default: 0.\n      out_file (str, optional): The filename to write the image.\n          Default: None.\n      overlay_gt_pred (bool): Whether to plot gts and predictions on the\n       same image. If False, predictions and gts will be plotted on two same\n       image which will be concatenated in vertical direction. The image\n       above is drawn with gt, and the image below is drawn with the\n       prediction result. Default: True.\n\n    Returns:\n        ndarray: The image with bboxes or masks drawn on it.\n    \"\"\"\n    assert 'gt_bboxes' in annotation\n    assert 'gt_labels' in annotation\n    assert isinstance(result, (tuple, list, dict)), 'Expected ' \\\n        f'tuple or list or dict, but get {type(result)}'\n\n    gt_bboxes = annotation['gt_bboxes']\n    gt_labels = annotation['gt_labels']\n    gt_masks = annotation.get('gt_masks', None)\n    if gt_masks is not None:\n        gt_masks = mask2ndarray(gt_masks)\n\n    gt_seg = annotation.get('gt_semantic_seg', None)\n    if gt_seg is not None:\n        pad_value = 255  # the padding value of gt_seg\n        sem_labels = np.unique(gt_seg)\n        all_labels = np.concatenate((gt_labels, sem_labels), axis=0)\n        all_labels, counts = np.unique(all_labels, return_counts=True)\n        stuff_labels = all_labels[np.logical_and(counts < 2,\n                                                 all_labels != pad_value)]\n        stuff_masks = gt_seg[None] == stuff_labels[:, None, None]\n        gt_labels = np.concatenate((gt_labels, stuff_labels), axis=0)\n        gt_masks = np.concatenate((gt_masks, stuff_masks.astype(np.uint8)),\n                                  axis=0)\n        # If you need to show the bounding boxes,\n        # please comment the following line\n        # gt_bboxes = None\n\n    img = mmcv.imread(img)\n\n    img_with_gt = imshow_det_bboxes(\n        img,\n        gt_bboxes,\n        gt_labels,\n        gt_masks,\n        class_names=class_names,\n        bbox_color=gt_bbox_color,\n        text_color=gt_text_color,\n        mask_color=gt_mask_color,\n        thickness=thickness,\n        font_size=font_size,\n        win_name=win_name,\n        show=False)\n\n    if not isinstance(result, dict):\n        if isinstance(result, tuple):\n            bbox_result, segm_result = result\n            if isinstance(segm_result, tuple):\n                segm_result = segm_result[0]  # ms rcnn\n        else:\n            bbox_result, segm_result = result, None\n\n        bboxes = np.vstack(bbox_result)\n        labels = [\n            np.full(bbox.shape[0], i, dtype=np.int32)\n            for i, bbox in enumerate(bbox_result)\n        ]\n        labels = np.concatenate(labels)\n\n        segms = None\n        if segm_result is not None and len(labels) > 0:  # non empty\n            segms = mmcv.concat_list(segm_result)\n            segms = mask_util.decode(segms)\n            segms = segms.transpose(2, 0, 1)\n    else:\n        assert class_names is not None, 'We need to know the number ' \\\n                                        'of classes.'\n        VOID = len(class_names)\n        bboxes = None\n        pan_results = result['pan_results']\n        # keep objects ahead\n        ids = np.unique(pan_results)[::-1]\n        legal_indices = ids != VOID\n        ids = ids[legal_indices]\n        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)\n        segms = (pan_results[None] == ids[:, None, None])\n\n    if overlay_gt_pred:\n        img = imshow_det_bboxes(\n            img_with_gt,\n            bboxes,\n            labels,\n            segms=segms,\n            class_names=class_names,\n            score_thr=score_thr,\n            bbox_color=det_bbox_color,\n            text_color=det_text_color,\n            mask_color=det_mask_color,\n            thickness=thickness,\n            font_size=font_size,\n            win_name=win_name,\n            show=show,\n            wait_time=wait_time,\n            out_file=out_file)\n    else:\n        img_with_det = imshow_det_bboxes(\n            img,\n            bboxes,\n            labels,\n            segms=segms,\n            class_names=class_names,\n            score_thr=score_thr,\n            bbox_color=det_bbox_color,\n            text_color=det_text_color,\n            mask_color=det_mask_color,\n            thickness=thickness,\n            font_size=font_size,\n            win_name=win_name,\n            show=False)\n        img = np.concatenate([img_with_gt, img_with_det], axis=0)\n\n        plt.imshow(img)\n        if show:\n            if wait_time == 0:\n                plt.show()\n            else:\n                plt.show(block=False)\n                plt.pause(wait_time)\n        if out_file is not None:\n            mmcv.imwrite(img, out_file)\n        plt.close()\n\n    return img\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/core/visualization/palette.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\n\n\ndef palette_val(palette):\n    \"\"\"Convert palette to matplotlib palette.\n\n    Args:\n        palette List[tuple]: A list of color tuples.\n\n    Returns:\n        List[tuple[float]]: A list of RGB matplotlib color tuples.\n    \"\"\"\n    new_palette = []\n    for color in palette:\n        color = [c / 255 for c in color]\n        new_palette.append(tuple(color))\n    return new_palette\n\n\ndef get_palette(palette, num_classes):\n    \"\"\"Get palette from various inputs.\n\n    Args:\n        palette (list[tuple] | str | tuple | :obj:`Color`): palette inputs.\n        num_classes (int): the number of classes.\n\n    Returns:\n        list[tuple[int]]: A list of color tuples.\n    \"\"\"\n    assert isinstance(num_classes, int)\n\n    if isinstance(palette, list):\n        dataset_palette = palette\n    elif isinstance(palette, tuple):\n        dataset_palette = [palette] * num_classes\n    elif palette == 'random' or palette is None:\n        state = np.random.get_state()\n        # random color\n        np.random.seed(42)\n        palette = np.random.randint(0, 256, size=(num_classes, 3))\n        np.random.set_state(state)\n        dataset_palette = [tuple(c) for c in palette]\n    elif palette == 'coco':\n        from mmdet.datasets import CocoDataset, CocoPanopticDataset\n        dataset_palette = CocoDataset.PALETTE\n        if len(dataset_palette) < num_classes:\n            dataset_palette = CocoPanopticDataset.PALETTE\n    elif palette == 'citys':\n        from mmdet.datasets import CityscapesDataset\n        dataset_palette = CityscapesDataset.PALETTE\n    elif palette == 'voc':\n        from mmdet.datasets import VOCDataset\n        dataset_palette = VOCDataset.PALETTE\n    elif mmcv.is_str(palette):\n        dataset_palette = [mmcv.color_val(palette)[::-1]] * num_classes\n    else:\n        raise TypeError(f'Invalid type for palette: {type(palette)}')\n\n    assert len(dataset_palette) >= num_classes, \\\n        'The length of palette should not be less than `num_classes`.'\n    return dataset_palette\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .backbones import *  # noqa: F401,F403\nfrom .builder import (BACKBONES, DETECTORS, HEADS, LOSSES, NECKS,\n                      ROI_EXTRACTORS, SHARED_HEADS, build_backbone,\n                      build_detector, build_head, build_loss, build_neck,\n                      build_roi_extractor, build_shared_head)\nfrom .dense_heads import *  # noqa: F401,F403\nfrom .detectors import *  # noqa: F401,F403\nfrom .losses import *  # noqa: F401,F403\nfrom .necks import *  # noqa: F401,F403\nfrom .plugins import *  # noqa: F401,F403\nfrom .roi_heads import *  # noqa: F401,F403\nfrom .seg_heads import *  # noqa: F401,F403\n\n__all__ = [\n    'BACKBONES', 'NECKS', 'ROI_EXTRACTORS', 'SHARED_HEADS', 'HEADS', 'LOSSES',\n    'DETECTORS', 'build_backbone', 'build_neck', 'build_roi_extractor',\n    'build_shared_head', 'build_head', 'build_loss', 'build_detector'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .csp_darknet import CSPDarknet\nfrom .darknet import Darknet\nfrom .detectors_resnet import DetectoRS_ResNet\nfrom .detectors_resnext import DetectoRS_ResNeXt\nfrom .efficientnet import EfficientNet\nfrom .hourglass import HourglassNet\nfrom .hrnet import HRNet\nfrom .mobilenet_v2 import MobileNetV2\nfrom .pvt import PyramidVisionTransformer, PyramidVisionTransformerV2\nfrom .regnet import RegNet\nfrom .res2net import Res2Net\nfrom .resnest import ResNeSt\nfrom .resnet import ResNet, ResNetV1d\nfrom .resnext import ResNeXt\nfrom .ssd_vgg import SSDVGG\nfrom .swin import SwinTransformer\nfrom .trident_resnet import TridentResNet\n\n__all__ = [\n    'RegNet', 'ResNet', 'ResNetV1d', 'ResNeXt', 'SSDVGG', 'HRNet',\n    'MobileNetV2', 'Res2Net', 'HourglassNet', 'DetectoRS_ResNet',\n    'DetectoRS_ResNeXt', 'Darknet', 'ResNeSt', 'TridentResNet', 'CSPDarknet',\n    'SwinTransformer', 'PyramidVisionTransformer',\n    'PyramidVisionTransformerV2', 'EfficientNet'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/csp_darknet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, DepthwiseSeparableConvModule\nfrom mmcv.runner import BaseModule\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom ..builder import BACKBONES\nfrom ..utils import CSPLayer\n\n\nclass Focus(nn.Module):\n    \"\"\"Focus width and height information into channel space.\n\n    Args:\n        in_channels (int): The input channels of this Module.\n        out_channels (int): The output channels of this Module.\n        kernel_size (int): The kernel size of the convolution. Default: 1\n        stride (int): The stride of the convolution. Default: 1\n        conv_cfg (dict): Config dict for convolution layer. Default: None,\n            which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN', momentum=0.03, eps=0.001).\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='Swish').\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size=1,\n                 stride=1,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish')):\n        super().__init__()\n        self.conv = ConvModule(\n            in_channels * 4,\n            out_channels,\n            kernel_size,\n            stride,\n            padding=(kernel_size - 1) // 2,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n\n    def forward(self, x):\n        # shape of x (b,c,w,h) -> y(b,4c,w/2,h/2)\n        patch_top_left = x[..., ::2, ::2]\n        patch_top_right = x[..., ::2, 1::2]\n        patch_bot_left = x[..., 1::2, ::2]\n        patch_bot_right = x[..., 1::2, 1::2]\n        x = torch.cat(\n            (\n                patch_top_left,\n                patch_bot_left,\n                patch_top_right,\n                patch_bot_right,\n            ),\n            dim=1,\n        )\n        return self.conv(x)\n\n\nclass SPPBottleneck(BaseModule):\n    \"\"\"Spatial pyramid pooling layer used in YOLOv3-SPP.\n\n    Args:\n        in_channels (int): The input channels of this Module.\n        out_channels (int): The output channels of this Module.\n        kernel_sizes (tuple[int]): Sequential of kernel sizes of pooling\n            layers. Default: (5, 9, 13).\n        conv_cfg (dict): Config dict for convolution layer. Default: None,\n            which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN').\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='Swish').\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_sizes=(5, 9, 13),\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        mid_channels = in_channels // 2\n        self.conv1 = ConvModule(\n            in_channels,\n            mid_channels,\n            1,\n            stride=1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.poolings = nn.ModuleList([\n            nn.MaxPool2d(kernel_size=ks, stride=1, padding=ks // 2)\n            for ks in kernel_sizes\n        ])\n        conv2_channels = mid_channels * (len(kernel_sizes) + 1)\n        self.conv2 = ConvModule(\n            conv2_channels,\n            out_channels,\n            1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = torch.cat([x] + [pooling(x) for pooling in self.poolings], dim=1)\n        x = self.conv2(x)\n        return x\n\n\n@BACKBONES.register_module()\nclass CSPDarknet(BaseModule):\n    \"\"\"CSP-Darknet backbone used in YOLOv5 and YOLOX.\n\n    Args:\n        arch (str): Architecture of CSP-Darknet, from {P5, P6}.\n            Default: P5.\n        deepen_factor (float): Depth multiplier, multiply number of\n            blocks in CSP layer by this amount. Default: 1.0.\n        widen_factor (float): Width multiplier, multiply number of\n            channels in each layer by this amount. Default: 1.0.\n        out_indices (Sequence[int]): Output from which stages.\n            Default: (2, 3, 4).\n        frozen_stages (int): Stages to be frozen (stop grad and set eval\n            mode). -1 means not freezing any parameters. Default: -1.\n        use_depthwise (bool): Whether to use depthwise separable convolution.\n            Default: False.\n        arch_ovewrite(list): Overwrite default arch settings. Default: None.\n        spp_kernal_sizes: (tuple[int]): Sequential of kernel sizes of SPP\n            layers. Default: (5, 9, 13).\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: dict(type='BN', requires_grad=True).\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='LeakyReLU', negative_slope=0.1).\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None.\n    Example:\n        >>> from mmdet.models import CSPDarknet\n        >>> import torch\n        >>> self = CSPDarknet(depth=53)\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 416, 416)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        ...\n        (1, 256, 52, 52)\n        (1, 512, 26, 26)\n        (1, 1024, 13, 13)\n    \"\"\"\n    # From left to right:\n    # in_channels, out_channels, num_blocks, add_identity, use_spp\n    arch_settings = {\n        'P5': [[64, 128, 3, True, False], [128, 256, 9, True, False],\n               [256, 512, 9, True, False], [512, 1024, 3, False, True]],\n        'P6': [[64, 128, 3, True, False], [128, 256, 9, True, False],\n               [256, 512, 9, True, False], [512, 768, 3, True, False],\n               [768, 1024, 3, False, True]]\n    }\n\n    def __init__(self,\n                 arch='P5',\n                 deepen_factor=1.0,\n                 widen_factor=1.0,\n                 out_indices=(2, 3, 4),\n                 frozen_stages=-1,\n                 use_depthwise=False,\n                 arch_ovewrite=None,\n                 spp_kernal_sizes=(5, 9, 13),\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 norm_eval=False,\n                 init_cfg=dict(\n                     type='Kaiming',\n                     layer='Conv2d',\n                     a=math.sqrt(5),\n                     distribution='uniform',\n                     mode='fan_in',\n                     nonlinearity='leaky_relu')):\n        super().__init__(init_cfg)\n        arch_setting = self.arch_settings[arch]\n        if arch_ovewrite:\n            arch_setting = arch_ovewrite\n        assert set(out_indices).issubset(\n            i for i in range(len(arch_setting) + 1))\n        if frozen_stages not in range(-1, len(arch_setting) + 1):\n            raise ValueError('frozen_stages must be in range(-1, '\n                             'len(arch_setting) + 1). But received '\n                             f'{frozen_stages}')\n\n        self.out_indices = out_indices\n        self.frozen_stages = frozen_stages\n        self.use_depthwise = use_depthwise\n        self.norm_eval = norm_eval\n        conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule\n\n        self.stem = Focus(\n            3,\n            int(arch_setting[0][0] * widen_factor),\n            kernel_size=3,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.layers = ['stem']\n\n        for i, (in_channels, out_channels, num_blocks, add_identity,\n                use_spp) in enumerate(arch_setting):\n            in_channels = int(in_channels * widen_factor)\n            out_channels = int(out_channels * widen_factor)\n            num_blocks = max(round(num_blocks * deepen_factor), 1)\n            stage = []\n            conv_layer = conv(\n                in_channels,\n                out_channels,\n                3,\n                stride=2,\n                padding=1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg)\n            stage.append(conv_layer)\n            if use_spp:\n                spp = SPPBottleneck(\n                    out_channels,\n                    out_channels,\n                    kernel_sizes=spp_kernal_sizes,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg)\n                stage.append(spp)\n            csp_layer = CSPLayer(\n                out_channels,\n                out_channels,\n                num_blocks=num_blocks,\n                add_identity=add_identity,\n                use_depthwise=use_depthwise,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg)\n            stage.append(csp_layer)\n            self.add_module(f'stage{i + 1}', nn.Sequential(*stage))\n            self.layers.append(f'stage{i + 1}')\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            for i in range(self.frozen_stages + 1):\n                m = getattr(self, self.layers[i])\n                m.eval()\n                for param in m.parameters():\n                    param.requires_grad = False\n\n    def train(self, mode=True):\n        super(CSPDarknet, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                if isinstance(m, _BatchNorm):\n                    m.eval()\n\n    def forward(self, x):\n        outs = []\n        for i, layer_name in enumerate(self.layers):\n            layer = getattr(self, layer_name)\n            x = layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/darknet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Copyright (c) 2019 Western Digital Corporation or its affiliates.\n\nimport warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom ..builder import BACKBONES\n\n\nclass ResBlock(BaseModule):\n    \"\"\"The basic residual block used in Darknet. Each ResBlock consists of two\n    ConvModules and the input is added to the final output. Each ConvModule is\n    composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer\n    has half of the number of the filters as much as the second convLayer. The\n    first convLayer has filter size of 1x1 and the second one has the filter\n    size of 3x3.\n\n    Args:\n        in_channels (int): The input channels. Must be even.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: dict(type='BN', requires_grad=True)\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='LeakyReLU', negative_slope=0.1).\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 act_cfg=dict(type='LeakyReLU', negative_slope=0.1),\n                 init_cfg=None):\n        super(ResBlock, self).__init__(init_cfg)\n        assert in_channels % 2 == 0  # ensure the in_channels is even\n        half_in_channels = in_channels // 2\n\n        # shortcut\n        cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)\n\n        self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg)\n        self.conv2 = ConvModule(\n            half_in_channels, in_channels, 3, padding=1, **cfg)\n\n    def forward(self, x):\n        residual = x\n        out = self.conv1(x)\n        out = self.conv2(out)\n        out = out + residual\n\n        return out\n\n\n@BACKBONES.register_module()\nclass Darknet(BaseModule):\n    \"\"\"Darknet backbone.\n\n    Args:\n        depth (int): Depth of Darknet. Currently only support 53.\n        out_indices (Sequence[int]): Output from which stages.\n        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).\n            -1 means not freezing any parameters. Default: -1.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: dict(type='BN', requires_grad=True)\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='LeakyReLU', negative_slope=0.1).\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    Example:\n        >>> from mmdet.models import Darknet\n        >>> import torch\n        >>> self = Darknet(depth=53)\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 416, 416)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        ...\n        (1, 256, 52, 52)\n        (1, 512, 26, 26)\n        (1, 1024, 13, 13)\n    \"\"\"\n\n    # Dict(depth: (layers, channels))\n    arch_settings = {\n        53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512),\n                               (512, 1024)))\n    }\n\n    def __init__(self,\n                 depth=53,\n                 out_indices=(3, 4, 5),\n                 frozen_stages=-1,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 act_cfg=dict(type='LeakyReLU', negative_slope=0.1),\n                 norm_eval=True,\n                 pretrained=None,\n                 init_cfg=None):\n        super(Darknet, self).__init__(init_cfg)\n        if depth not in self.arch_settings:\n            raise KeyError(f'invalid depth {depth} for darknet')\n\n        self.depth = depth\n        self.out_indices = out_indices\n        self.frozen_stages = frozen_stages\n        self.layers, self.channels = self.arch_settings[depth]\n\n        cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)\n\n        self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg)\n\n        self.cr_blocks = ['conv1']\n        for i, n_layers in enumerate(self.layers):\n            layer_name = f'conv_res_block{i + 1}'\n            in_c, out_c = self.channels[i]\n            self.add_module(\n                layer_name,\n                self.make_conv_res_block(in_c, out_c, n_layers, **cfg))\n            self.cr_blocks.append(layer_name)\n\n        self.norm_eval = norm_eval\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n    def forward(self, x):\n        outs = []\n        for i, layer_name in enumerate(self.cr_blocks):\n            cr_block = getattr(self, layer_name)\n            x = cr_block(x)\n            if i in self.out_indices:\n                outs.append(x)\n\n        return tuple(outs)\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            for i in range(self.frozen_stages):\n                m = getattr(self, self.cr_blocks[i])\n                m.eval()\n                for param in m.parameters():\n                    param.requires_grad = False\n\n    def train(self, mode=True):\n        super(Darknet, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                if isinstance(m, _BatchNorm):\n                    m.eval()\n\n    @staticmethod\n    def make_conv_res_block(in_channels,\n                            out_channels,\n                            res_repeat,\n                            conv_cfg=None,\n                            norm_cfg=dict(type='BN', requires_grad=True),\n                            act_cfg=dict(type='LeakyReLU',\n                                         negative_slope=0.1)):\n        \"\"\"In Darknet backbone, ConvLayer is usually followed by ResBlock. This\n        function will make that. The Conv layers always have 3x3 filters with\n        stride=2. The number of the filters in Conv layer is the same as the\n        out channels of the ResBlock.\n\n        Args:\n            in_channels (int): The number of input channels.\n            out_channels (int): The number of output channels.\n            res_repeat (int): The number of ResBlocks.\n            conv_cfg (dict): Config dict for convolution layer. Default: None.\n            norm_cfg (dict): Dictionary to construct and config norm layer.\n                Default: dict(type='BN', requires_grad=True)\n            act_cfg (dict): Config dict for activation layer.\n                Default: dict(type='LeakyReLU', negative_slope=0.1).\n        \"\"\"\n\n        cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)\n\n        model = nn.Sequential()\n        model.add_module(\n            'conv',\n            ConvModule(\n                in_channels, out_channels, 3, stride=2, padding=1, **cfg))\n        for idx in range(res_repeat):\n            model.add_module('res{}'.format(idx),\n                             ResBlock(out_channels, **cfg))\n        return model\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/detectors_resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import (build_conv_layer, build_norm_layer, constant_init,\n                      kaiming_init)\nfrom mmcv.runner import Sequential, load_checkpoint\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.utils import get_root_logger\nfrom ..builder import BACKBONES\nfrom .resnet import BasicBlock\nfrom .resnet import Bottleneck as _Bottleneck\nfrom .resnet import ResNet\n\n\nclass Bottleneck(_Bottleneck):\n    r\"\"\"Bottleneck for the ResNet backbone in `DetectoRS\n    <https://arxiv.org/pdf/2006.02334.pdf>`_.\n\n    This bottleneck allows the users to specify whether to use\n    SAC (Switchable Atrous Convolution) and RFP (Recursive Feature Pyramid).\n\n    Args:\n         inplanes (int): The number of input channels.\n         planes (int): The number of output channels before expansion.\n         rfp_inplanes (int, optional): The number of channels from RFP.\n             Default: None. If specified, an additional conv layer will be\n             added for ``rfp_feat``. Otherwise, the structure is the same as\n             base class.\n         sac (dict, optional): Dictionary to construct SAC. Default: None.\n         init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 rfp_inplanes=None,\n                 sac=None,\n                 init_cfg=None,\n                 **kwargs):\n        super(Bottleneck, self).__init__(\n            inplanes, planes, init_cfg=init_cfg, **kwargs)\n\n        assert sac is None or isinstance(sac, dict)\n        self.sac = sac\n        self.with_sac = sac is not None\n        if self.with_sac:\n            self.conv2 = build_conv_layer(\n                self.sac,\n                planes,\n                planes,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                bias=False)\n\n        self.rfp_inplanes = rfp_inplanes\n        if self.rfp_inplanes:\n            self.rfp_conv = build_conv_layer(\n                None,\n                self.rfp_inplanes,\n                planes * self.expansion,\n                1,\n                stride=1,\n                bias=True)\n            if init_cfg is None:\n                self.init_cfg = dict(\n                    type='Constant', val=0, override=dict(name='rfp_conv'))\n\n    def rfp_forward(self, x, rfp_feat):\n        \"\"\"The forward function that also takes the RFP features as input.\"\"\"\n\n        def _inner_forward(x):\n            identity = x\n\n            out = self.conv1(x)\n            out = self.norm1(out)\n            out = self.relu(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv1_plugin_names)\n\n            out = self.conv2(out)\n            out = self.norm2(out)\n            out = self.relu(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv2_plugin_names)\n\n            out = self.conv3(out)\n            out = self.norm3(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv3_plugin_names)\n\n            if self.downsample is not None:\n                identity = self.downsample(x)\n\n            out += identity\n\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        if self.rfp_inplanes:\n            rfp_feat = self.rfp_conv(rfp_feat)\n            out = out + rfp_feat\n\n        out = self.relu(out)\n\n        return out\n\n\nclass ResLayer(Sequential):\n    \"\"\"ResLayer to build ResNet style backbone for RPF in detectoRS.\n\n    The difference between this module and base class is that we pass\n    ``rfp_inplanes`` to the first block.\n\n    Args:\n        block (nn.Module): block used to build ResLayer.\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        num_blocks (int): number of blocks.\n        stride (int): stride of the first block. Default: 1\n        avg_down (bool): Use AvgPool instead of stride conv when\n            downsampling in the bottleneck. Default: False\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: dict(type='BN')\n        downsample_first (bool): Downsample at the first block or last block.\n            False for Hourglass, True for ResNet. Default: True\n        rfp_inplanes (int, optional): The number of channels from RFP.\n            Default: None. If specified, an additional conv layer will be\n            added for ``rfp_feat``. Otherwise, the structure is the same as\n            base class.\n    \"\"\"\n\n    def __init__(self,\n                 block,\n                 inplanes,\n                 planes,\n                 num_blocks,\n                 stride=1,\n                 avg_down=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 downsample_first=True,\n                 rfp_inplanes=None,\n                 **kwargs):\n        self.block = block\n        assert downsample_first, f'downsample_first={downsample_first} is ' \\\n                                 'not supported in DetectoRS'\n\n        downsample = None\n        if stride != 1 or inplanes != planes * block.expansion:\n            downsample = []\n            conv_stride = stride\n            if avg_down and stride != 1:\n                conv_stride = 1\n                downsample.append(\n                    nn.AvgPool2d(\n                        kernel_size=stride,\n                        stride=stride,\n                        ceil_mode=True,\n                        count_include_pad=False))\n            downsample.extend([\n                build_conv_layer(\n                    conv_cfg,\n                    inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=conv_stride,\n                    bias=False),\n                build_norm_layer(norm_cfg, planes * block.expansion)[1]\n            ])\n            downsample = nn.Sequential(*downsample)\n\n        layers = []\n        layers.append(\n            block(\n                inplanes=inplanes,\n                planes=planes,\n                stride=stride,\n                downsample=downsample,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                rfp_inplanes=rfp_inplanes,\n                **kwargs))\n        inplanes = planes * block.expansion\n        for _ in range(1, num_blocks):\n            layers.append(\n                block(\n                    inplanes=inplanes,\n                    planes=planes,\n                    stride=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    **kwargs))\n\n        super(ResLayer, self).__init__(*layers)\n\n\n@BACKBONES.register_module()\nclass DetectoRS_ResNet(ResNet):\n    \"\"\"ResNet backbone for DetectoRS.\n\n    Args:\n        sac (dict, optional): Dictionary to construct SAC (Switchable Atrous\n            Convolution). Default: None.\n        stage_with_sac (list): Which stage to use sac. Default: (False, False,\n            False, False).\n        rfp_inplanes (int, optional): The number of channels from RFP.\n            Default: None. If specified, an additional conv layer will be\n            added for ``rfp_feat``. Otherwise, the structure is the same as\n            base class.\n        output_img (bool): If ``True``, the input image will be inserted into\n            the starting position of output. Default: False.\n    \"\"\"\n\n    arch_settings = {\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self,\n                 sac=None,\n                 stage_with_sac=(False, False, False, False),\n                 rfp_inplanes=None,\n                 output_img=False,\n                 pretrained=None,\n                 init_cfg=None,\n                 **kwargs):\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        self.pretrained = pretrained\n        if init_cfg is not None:\n            assert isinstance(init_cfg, dict), \\\n                f'init_cfg must be a dict, but got {type(init_cfg)}'\n            if 'type' in init_cfg:\n                assert init_cfg.get('type') == 'Pretrained', \\\n                    'Only can initialize module by loading a pretrained model'\n            else:\n                raise KeyError('`init_cfg` must contain the key \"type\"')\n            self.pretrained = init_cfg.get('checkpoint')\n        self.sac = sac\n        self.stage_with_sac = stage_with_sac\n        self.rfp_inplanes = rfp_inplanes\n        self.output_img = output_img\n        super(DetectoRS_ResNet, self).__init__(**kwargs)\n\n        self.inplanes = self.stem_channels\n        self.res_layers = []\n        for i, num_blocks in enumerate(self.stage_blocks):\n            stride = self.strides[i]\n            dilation = self.dilations[i]\n            dcn = self.dcn if self.stage_with_dcn[i] else None\n            sac = self.sac if self.stage_with_sac[i] else None\n            if self.plugins is not None:\n                stage_plugins = self.make_stage_plugins(self.plugins, i)\n            else:\n                stage_plugins = None\n            planes = self.base_channels * 2**i\n            res_layer = self.make_res_layer(\n                block=self.block,\n                inplanes=self.inplanes,\n                planes=planes,\n                num_blocks=num_blocks,\n                stride=stride,\n                dilation=dilation,\n                style=self.style,\n                avg_down=self.avg_down,\n                with_cp=self.with_cp,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg,\n                dcn=dcn,\n                sac=sac,\n                rfp_inplanes=rfp_inplanes if i > 0 else None,\n                plugins=stage_plugins)\n            self.inplanes = planes * self.block.expansion\n            layer_name = f'layer{i + 1}'\n            self.add_module(layer_name, res_layer)\n            self.res_layers.append(layer_name)\n\n        self._freeze_stages()\n\n    # In order to be properly initialized by RFP\n    def init_weights(self):\n        # Calling this method will cause parameter initialization exception\n        # super(DetectoRS_ResNet, self).init_weights()\n\n        if isinstance(self.pretrained, str):\n            logger = get_root_logger()\n            load_checkpoint(self, self.pretrained, strict=False, logger=logger)\n        elif self.pretrained is None:\n            for m in self.modules():\n                if isinstance(m, nn.Conv2d):\n                    kaiming_init(m)\n                elif isinstance(m, (_BatchNorm, nn.GroupNorm)):\n                    constant_init(m, 1)\n\n            if self.dcn is not None:\n                for m in self.modules():\n                    if isinstance(m, Bottleneck) and hasattr(\n                            m.conv2, 'conv_offset'):\n                        constant_init(m.conv2.conv_offset, 0)\n\n            if self.zero_init_residual:\n                for m in self.modules():\n                    if isinstance(m, Bottleneck):\n                        constant_init(m.norm3, 0)\n                    elif isinstance(m, BasicBlock):\n                        constant_init(m.norm2, 0)\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n    def make_res_layer(self, **kwargs):\n        \"\"\"Pack all blocks in a stage into a ``ResLayer`` for DetectoRS.\"\"\"\n        return ResLayer(**kwargs)\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        outs = list(super(DetectoRS_ResNet, self).forward(x))\n        if self.output_img:\n            outs.insert(0, x)\n        return tuple(outs)\n\n    def rfp_forward(self, x, rfp_feats):\n        \"\"\"Forward function for RFP.\"\"\"\n        if self.deep_stem:\n            x = self.stem(x)\n        else:\n            x = self.conv1(x)\n            x = self.norm1(x)\n            x = self.relu(x)\n        x = self.maxpool(x)\n        outs = []\n        for i, layer_name in enumerate(self.res_layers):\n            res_layer = getattr(self, layer_name)\n            rfp_feat = rfp_feats[i] if i > 0 else None\n            for layer in res_layer:\n                x = layer.rfp_forward(x, rfp_feat)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/detectors_resnext.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\n\nfrom ..builder import BACKBONES\nfrom .detectors_resnet import Bottleneck as _Bottleneck\nfrom .detectors_resnet import DetectoRS_ResNet\n\n\nclass Bottleneck(_Bottleneck):\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 groups=1,\n                 base_width=4,\n                 base_channels=64,\n                 **kwargs):\n        \"\"\"Bottleneck block for ResNeXt.\n\n        If style is \"pytorch\", the stride-two layer is the 3x3 conv layer, if\n        it is \"caffe\", the stride-two layer is the first 1x1 conv layer.\n        \"\"\"\n        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)\n\n        if groups == 1:\n            width = self.planes\n        else:\n            width = math.floor(self.planes *\n                               (base_width / base_channels)) * groups\n\n        self.norm1_name, norm1 = build_norm_layer(\n            self.norm_cfg, width, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(\n            self.norm_cfg, width, postfix=2)\n        self.norm3_name, norm3 = build_norm_layer(\n            self.norm_cfg, self.planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            self.inplanes,\n            width,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        fallback_on_stride = False\n        self.with_modulated_dcn = False\n        if self.with_dcn:\n            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)\n        if self.with_sac:\n            self.conv2 = build_conv_layer(\n                self.sac,\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                bias=False)\n        elif not self.with_dcn or fallback_on_stride:\n            self.conv2 = build_conv_layer(\n                self.conv_cfg,\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                bias=False)\n        else:\n            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'\n            self.conv2 = build_conv_layer(\n                self.dcn,\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                bias=False)\n\n        self.add_module(self.norm2_name, norm2)\n        self.conv3 = build_conv_layer(\n            self.conv_cfg,\n            width,\n            self.planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n\n@BACKBONES.register_module()\nclass DetectoRS_ResNeXt(DetectoRS_ResNet):\n    \"\"\"ResNeXt backbone for DetectoRS.\n\n    Args:\n        groups (int): The number of groups in ResNeXt.\n        base_width (int): The base width of ResNeXt.\n    \"\"\"\n\n    arch_settings = {\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self, groups=1, base_width=4, **kwargs):\n        self.groups = groups\n        self.base_width = base_width\n        super(DetectoRS_ResNeXt, self).__init__(**kwargs)\n\n    def make_res_layer(self, **kwargs):\n        return super().make_res_layer(\n            groups=self.groups,\n            base_width=self.base_width,\n            base_channels=self.base_channels,\n            **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/efficientnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport math\nfrom functools import partial\n\nimport torch\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn.bricks import ConvModule, DropPath\nfrom mmcv.runner import BaseModule, Sequential\n\nfrom ..builder import BACKBONES\nfrom ..utils import InvertedResidual, SELayer, make_divisible\n\n\nclass EdgeResidual(BaseModule):\n    \"\"\"Edge Residual Block.\n\n    Args:\n        in_channels (int): The input channels of this module.\n        out_channels (int): The output channels of this module.\n        mid_channels (int): The input channels of the second convolution.\n        kernel_size (int): The kernel size of the first convolution.\n            Defaults to 3.\n        stride (int): The stride of the first convolution. Defaults to 1.\n        se_cfg (dict, optional): Config dict for se layer. Defaults to None,\n            which means no se layer.\n        with_residual (bool): Use residual connection. Defaults to True.\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Defaults to None, which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Defaults to ``dict(type='BN')``.\n        act_cfg (dict): Config dict for activation layer.\n            Defaults to ``dict(type='ReLU')``.\n        drop_path_rate (float): stochastic depth rate. Defaults to 0.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed. Defaults to False.\n        init_cfg (dict | list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 mid_channels,\n                 kernel_size=3,\n                 stride=1,\n                 se_cfg=None,\n                 with_residual=True,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 act_cfg=dict(type='ReLU'),\n                 drop_path_rate=0.,\n                 with_cp=False,\n                 init_cfg=None,\n                 **kwargs):\n        super(EdgeResidual, self).__init__(init_cfg=init_cfg)\n        assert stride in [1, 2]\n        self.with_cp = with_cp\n        self.drop_path = DropPath(\n            drop_path_rate) if drop_path_rate > 0 else nn.Identity()\n        self.with_se = se_cfg is not None\n        self.with_residual = (\n            stride == 1 and in_channels == out_channels and with_residual)\n\n        if self.with_se:\n            assert isinstance(se_cfg, dict)\n\n        self.conv1 = ConvModule(\n            in_channels=in_channels,\n            out_channels=mid_channels,\n            kernel_size=kernel_size,\n            stride=1,\n            padding=kernel_size // 2,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n\n        if self.with_se:\n            self.se = SELayer(**se_cfg)\n\n        self.conv2 = ConvModule(\n            in_channels=mid_channels,\n            out_channels=out_channels,\n            kernel_size=1,\n            stride=stride,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=None)\n\n    def forward(self, x):\n\n        def _inner_forward(x):\n            out = x\n            out = self.conv1(out)\n\n            if self.with_se:\n                out = self.se(out)\n\n            out = self.conv2(out)\n\n            if self.with_residual:\n                return x + self.drop_path(out)\n            else:\n                return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        return out\n\n\ndef model_scaling(layer_setting, arch_setting):\n    \"\"\"Scaling operation to the layer's parameters according to the\n    arch_setting.\"\"\"\n    # scale width\n    new_layer_setting = copy.deepcopy(layer_setting)\n    for layer_cfg in new_layer_setting:\n        for block_cfg in layer_cfg:\n            block_cfg[1] = make_divisible(block_cfg[1] * arch_setting[0], 8)\n\n    # scale depth\n    split_layer_setting = [new_layer_setting[0]]\n    for layer_cfg in new_layer_setting[1:-1]:\n        tmp_index = [0]\n        for i in range(len(layer_cfg) - 1):\n            if layer_cfg[i + 1][1] != layer_cfg[i][1]:\n                tmp_index.append(i + 1)\n        tmp_index.append(len(layer_cfg))\n        for i in range(len(tmp_index) - 1):\n            split_layer_setting.append(layer_cfg[tmp_index[i]:tmp_index[i +\n                                                                        1]])\n    split_layer_setting.append(new_layer_setting[-1])\n\n    num_of_layers = [len(layer_cfg) for layer_cfg in split_layer_setting[1:-1]]\n    new_layers = [\n        int(math.ceil(arch_setting[1] * num)) for num in num_of_layers\n    ]\n\n    merge_layer_setting = [split_layer_setting[0]]\n    for i, layer_cfg in enumerate(split_layer_setting[1:-1]):\n        if new_layers[i] <= num_of_layers[i]:\n            tmp_layer_cfg = layer_cfg[:new_layers[i]]\n        else:\n            tmp_layer_cfg = copy.deepcopy(layer_cfg) + [layer_cfg[-1]] * (\n                new_layers[i] - num_of_layers[i])\n        if tmp_layer_cfg[0][3] == 1 and i != 0:\n            merge_layer_setting[-1] += tmp_layer_cfg.copy()\n        else:\n            merge_layer_setting.append(tmp_layer_cfg.copy())\n    merge_layer_setting.append(split_layer_setting[-1])\n\n    return merge_layer_setting\n\n\n@BACKBONES.register_module()\nclass EfficientNet(BaseModule):\n    \"\"\"EfficientNet backbone.\n\n    Args:\n        arch (str): Architecture of efficientnet. Defaults to b0.\n        out_indices (Sequence[int]): Output from which stages.\n            Defaults to (6, ).\n        frozen_stages (int): Stages to be frozen (all param fixed).\n            Defaults to 0, which means not freezing any parameters.\n        conv_cfg (dict): Config dict for convolution layer.\n            Defaults to None, which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Defaults to dict(type='BN').\n        act_cfg (dict): Config dict for activation layer.\n            Defaults to dict(type='Swish').\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only. Defaults to False.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed. Defaults to False.\n    \"\"\"\n\n    # Parameters to build layers.\n    # 'b' represents the architecture of normal EfficientNet family includes\n    # 'b0', 'b1', 'b2', 'b3', 'b4', 'b5', 'b6', 'b7', 'b8'.\n    # 'e' represents the architecture of EfficientNet-EdgeTPU including 'es',\n    # 'em', 'el'.\n    # 6 parameters are needed to construct a layer, From left to right:\n    # - kernel_size: The kernel size of the block\n    # - out_channel: The number of out_channels of the block\n    # - se_ratio: The sequeeze ratio of SELayer.\n    # - stride: The stride of the block\n    # - expand_ratio: The expand_ratio of the mid_channels\n    # - block_type: -1: Not a block, 0: InvertedResidual, 1: EdgeResidual\n    layer_settings = {\n        'b': [[[3, 32, 0, 2, 0, -1]],\n              [[3, 16, 4, 1, 1, 0]],\n              [[3, 24, 4, 2, 6, 0],\n               [3, 24, 4, 1, 6, 0]],\n              [[5, 40, 4, 2, 6, 0],\n               [5, 40, 4, 1, 6, 0]],\n              [[3, 80, 4, 2, 6, 0],\n               [3, 80, 4, 1, 6, 0],\n               [3, 80, 4, 1, 6, 0],\n               [5, 112, 4, 1, 6, 0],\n               [5, 112, 4, 1, 6, 0],\n               [5, 112, 4, 1, 6, 0]],\n              [[5, 192, 4, 2, 6, 0],\n               [5, 192, 4, 1, 6, 0],\n               [5, 192, 4, 1, 6, 0],\n               [5, 192, 4, 1, 6, 0],\n               [3, 320, 4, 1, 6, 0]],\n              [[1, 1280, 0, 1, 0, -1]]\n              ],\n        'e': [[[3, 32, 0, 2, 0, -1]],\n              [[3, 24, 0, 1, 3, 1]],\n              [[3, 32, 0, 2, 8, 1],\n               [3, 32, 0, 1, 8, 1]],\n              [[3, 48, 0, 2, 8, 1],\n               [3, 48, 0, 1, 8, 1],\n               [3, 48, 0, 1, 8, 1],\n               [3, 48, 0, 1, 8, 1]],\n              [[5, 96, 0, 2, 8, 0],\n               [5, 96, 0, 1, 8, 0],\n               [5, 96, 0, 1, 8, 0],\n               [5, 96, 0, 1, 8, 0],\n               [5, 96, 0, 1, 8, 0],\n               [5, 144, 0, 1, 8, 0],\n               [5, 144, 0, 1, 8, 0],\n               [5, 144, 0, 1, 8, 0],\n               [5, 144, 0, 1, 8, 0]],\n              [[5, 192, 0, 2, 8, 0],\n               [5, 192, 0, 1, 8, 0]],\n              [[1, 1280, 0, 1, 0, -1]]\n              ]\n    }  # yapf: disable\n\n    # Parameters to build different kinds of architecture.\n    # From left to right: scaling factor for width, scaling factor for depth,\n    # resolution.\n    arch_settings = {\n        'b0': (1.0, 1.0, 224),\n        'b1': (1.0, 1.1, 240),\n        'b2': (1.1, 1.2, 260),\n        'b3': (1.2, 1.4, 300),\n        'b4': (1.4, 1.8, 380),\n        'b5': (1.6, 2.2, 456),\n        'b6': (1.8, 2.6, 528),\n        'b7': (2.0, 3.1, 600),\n        'b8': (2.2, 3.6, 672),\n        'es': (1.0, 1.0, 224),\n        'em': (1.0, 1.1, 240),\n        'el': (1.2, 1.4, 300)\n    }\n\n    def __init__(self,\n                 arch='b0',\n                 drop_path_rate=0.,\n                 out_indices=(6, ),\n                 frozen_stages=0,\n                 conv_cfg=dict(type='Conv2dAdaptivePadding'),\n                 norm_cfg=dict(type='BN', eps=1e-3),\n                 act_cfg=dict(type='Swish'),\n                 norm_eval=False,\n                 with_cp=False,\n                 init_cfg=[\n                     dict(type='Kaiming', layer='Conv2d'),\n                     dict(\n                         type='Constant',\n                         layer=['_BatchNorm', 'GroupNorm'],\n                         val=1)\n                 ]):\n        super(EfficientNet, self).__init__(init_cfg)\n        assert arch in self.arch_settings, \\\n            f'\"{arch}\" is not one of the arch_settings ' \\\n            f'({\", \".join(self.arch_settings.keys())})'\n        self.arch_setting = self.arch_settings[arch]\n        self.layer_setting = self.layer_settings[arch[:1]]\n        for index in out_indices:\n            if index not in range(0, len(self.layer_setting)):\n                raise ValueError('the item in out_indices must in '\n                                 f'range(0, {len(self.layer_setting)}). '\n                                 f'But received {index}')\n\n        if frozen_stages not in range(len(self.layer_setting) + 1):\n            raise ValueError('frozen_stages must be in range(0, '\n                             f'{len(self.layer_setting) + 1}). '\n                             f'But received {frozen_stages}')\n        self.drop_path_rate = drop_path_rate\n        self.out_indices = out_indices\n        self.frozen_stages = frozen_stages\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.norm_eval = norm_eval\n        self.with_cp = with_cp\n\n        self.layer_setting = model_scaling(self.layer_setting,\n                                           self.arch_setting)\n        block_cfg_0 = self.layer_setting[0][0]\n        block_cfg_last = self.layer_setting[-1][0]\n        self.in_channels = make_divisible(block_cfg_0[1], 8)\n        self.out_channels = block_cfg_last[1]\n        self.layers = nn.ModuleList()\n        self.layers.append(\n            ConvModule(\n                in_channels=3,\n                out_channels=self.in_channels,\n                kernel_size=block_cfg_0[0],\n                stride=block_cfg_0[3],\n                padding=block_cfg_0[0] // 2,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg,\n                act_cfg=self.act_cfg))\n        self.make_layer()\n        # Avoid building unused layers in mmdetection.\n        if len(self.layers) < max(self.out_indices) + 1:\n            self.layers.append(\n                ConvModule(\n                    in_channels=self.in_channels,\n                    out_channels=self.out_channels,\n                    kernel_size=block_cfg_last[0],\n                    stride=block_cfg_last[3],\n                    padding=block_cfg_last[0] // 2,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg))\n\n    def make_layer(self):\n        # Without the first and the final conv block.\n        layer_setting = self.layer_setting[1:-1]\n\n        total_num_blocks = sum([len(x) for x in layer_setting])\n        block_idx = 0\n        dpr = [\n            x.item()\n            for x in torch.linspace(0, self.drop_path_rate, total_num_blocks)\n        ]  # stochastic depth decay rule\n\n        for i, layer_cfg in enumerate(layer_setting):\n            # Avoid building unused layers in mmdetection.\n            if i > max(self.out_indices) - 1:\n                break\n            layer = []\n            for i, block_cfg in enumerate(layer_cfg):\n                (kernel_size, out_channels, se_ratio, stride, expand_ratio,\n                 block_type) = block_cfg\n\n                mid_channels = int(self.in_channels * expand_ratio)\n                out_channels = make_divisible(out_channels, 8)\n                if se_ratio <= 0:\n                    se_cfg = None\n                else:\n                    # In mmdetection, the `divisor` is deleted to align\n                    # the logic of SELayer with mmcls.\n                    se_cfg = dict(\n                        channels=mid_channels,\n                        ratio=expand_ratio * se_ratio,\n                        act_cfg=(self.act_cfg, dict(type='Sigmoid')))\n                if block_type == 1:  # edge tpu\n                    if i > 0 and expand_ratio == 3:\n                        with_residual = False\n                        expand_ratio = 4\n                    else:\n                        with_residual = True\n                    mid_channels = int(self.in_channels * expand_ratio)\n                    if se_cfg is not None:\n                        # In mmdetection, the `divisor` is deleted to align\n                        # the logic of SELayer with mmcls.\n                        se_cfg = dict(\n                            channels=mid_channels,\n                            ratio=se_ratio * expand_ratio,\n                            act_cfg=(self.act_cfg, dict(type='Sigmoid')))\n                    block = partial(EdgeResidual, with_residual=with_residual)\n                else:\n                    block = InvertedResidual\n                layer.append(\n                    block(\n                        in_channels=self.in_channels,\n                        out_channels=out_channels,\n                        mid_channels=mid_channels,\n                        kernel_size=kernel_size,\n                        stride=stride,\n                        se_cfg=se_cfg,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        act_cfg=self.act_cfg,\n                        drop_path_rate=dpr[block_idx],\n                        with_cp=self.with_cp,\n                        # In mmdetection, `with_expand_conv` is set to align\n                        # the logic of InvertedResidual with mmcls.\n                        with_expand_conv=(mid_channels != self.in_channels)))\n                self.in_channels = out_channels\n                block_idx += 1\n            self.layers.append(Sequential(*layer))\n\n    def forward(self, x):\n        outs = []\n        for i, layer in enumerate(self.layers):\n            x = layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n\n        return tuple(outs)\n\n    def _freeze_stages(self):\n        for i in range(self.frozen_stages):\n            m = self.layers[i]\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def train(self, mode=True):\n        super(EfficientNet, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eval()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/hourglass.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import BACKBONES\nfrom ..utils import ResLayer\nfrom .resnet import BasicBlock\n\n\nclass HourglassModule(BaseModule):\n    \"\"\"Hourglass Module for HourglassNet backbone.\n\n    Generate module recursively and use BasicBlock as the base unit.\n\n    Args:\n        depth (int): Depth of current HourglassModule.\n        stage_channels (list[int]): Feature channels of sub-modules in current\n            and follow-up HourglassModule.\n        stage_blocks (list[int]): Number of sub-modules stacked in current and\n            follow-up HourglassModule.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n        upsample_cfg (dict, optional): Config dict for interpolate layer.\n            Default: `dict(mode='nearest')`\n    \"\"\"\n\n    def __init__(self,\n                 depth,\n                 stage_channels,\n                 stage_blocks,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 init_cfg=None,\n                 upsample_cfg=dict(mode='nearest')):\n        super(HourglassModule, self).__init__(init_cfg)\n\n        self.depth = depth\n\n        cur_block = stage_blocks[0]\n        next_block = stage_blocks[1]\n\n        cur_channel = stage_channels[0]\n        next_channel = stage_channels[1]\n\n        self.up1 = ResLayer(\n            BasicBlock, cur_channel, cur_channel, cur_block, norm_cfg=norm_cfg)\n\n        self.low1 = ResLayer(\n            BasicBlock,\n            cur_channel,\n            next_channel,\n            cur_block,\n            stride=2,\n            norm_cfg=norm_cfg)\n\n        if self.depth > 1:\n            self.low2 = HourglassModule(depth - 1, stage_channels[1:],\n                                        stage_blocks[1:])\n        else:\n            self.low2 = ResLayer(\n                BasicBlock,\n                next_channel,\n                next_channel,\n                next_block,\n                norm_cfg=norm_cfg)\n\n        self.low3 = ResLayer(\n            BasicBlock,\n            next_channel,\n            cur_channel,\n            cur_block,\n            norm_cfg=norm_cfg,\n            downsample_first=False)\n\n        self.up2 = F.interpolate\n        self.upsample_cfg = upsample_cfg\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        up1 = self.up1(x)\n        low1 = self.low1(x)\n        low2 = self.low2(low1)\n        low3 = self.low3(low2)\n        # Fixing `scale factor` (e.g. 2) is common for upsampling, but\n        # in some cases the spatial size is mismatched and error will arise.\n        if 'scale_factor' in self.upsample_cfg:\n            up2 = self.up2(low3, **self.upsample_cfg)\n        else:\n            shape = up1.shape[2:]\n            up2 = self.up2(low3, size=shape, **self.upsample_cfg)\n        return up1 + up2\n\n\n@BACKBONES.register_module()\nclass HourglassNet(BaseModule):\n    \"\"\"HourglassNet backbone.\n\n    Stacked Hourglass Networks for Human Pose Estimation.\n    More details can be found in the `paper\n    <https://arxiv.org/abs/1603.06937>`_ .\n\n    Args:\n        downsample_times (int): Downsample times in a HourglassModule.\n        num_stacks (int): Number of HourglassModule modules stacked,\n            1 for Hourglass-52, 2 for Hourglass-104.\n        stage_channels (list[int]): Feature channel of each sub-module in a\n            HourglassModule.\n        stage_blocks (list[int]): Number of sub-modules stacked in a\n            HourglassModule.\n        feat_channel (int): Feature channel of conv after a HourglassModule.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    Example:\n        >>> from mmdet.models import HourglassNet\n        >>> import torch\n        >>> self = HourglassNet()\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 511, 511)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_output in level_outputs:\n        ...     print(tuple(level_output.shape))\n        (1, 256, 128, 128)\n        (1, 256, 128, 128)\n    \"\"\"\n\n    def __init__(self,\n                 downsample_times=5,\n                 num_stacks=2,\n                 stage_channels=(256, 256, 384, 384, 384, 512),\n                 stage_blocks=(2, 2, 2, 2, 2, 4),\n                 feat_channel=256,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 pretrained=None,\n                 init_cfg=None):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(HourglassNet, self).__init__(init_cfg)\n\n        self.num_stacks = num_stacks\n        assert self.num_stacks >= 1\n        assert len(stage_channels) == len(stage_blocks)\n        assert len(stage_channels) > downsample_times\n\n        cur_channel = stage_channels[0]\n\n        self.stem = nn.Sequential(\n            ConvModule(\n                3, cur_channel // 2, 7, padding=3, stride=2,\n                norm_cfg=norm_cfg),\n            ResLayer(\n                BasicBlock,\n                cur_channel // 2,\n                cur_channel,\n                1,\n                stride=2,\n                norm_cfg=norm_cfg))\n\n        self.hourglass_modules = nn.ModuleList([\n            HourglassModule(downsample_times, stage_channels, stage_blocks)\n            for _ in range(num_stacks)\n        ])\n\n        self.inters = ResLayer(\n            BasicBlock,\n            cur_channel,\n            cur_channel,\n            num_stacks - 1,\n            norm_cfg=norm_cfg)\n\n        self.conv1x1s = nn.ModuleList([\n            ConvModule(\n                cur_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None)\n            for _ in range(num_stacks - 1)\n        ])\n\n        self.out_convs = nn.ModuleList([\n            ConvModule(\n                cur_channel, feat_channel, 3, padding=1, norm_cfg=norm_cfg)\n            for _ in range(num_stacks)\n        ])\n\n        self.remap_convs = nn.ModuleList([\n            ConvModule(\n                feat_channel, cur_channel, 1, norm_cfg=norm_cfg, act_cfg=None)\n            for _ in range(num_stacks - 1)\n        ])\n\n        self.relu = nn.ReLU(inplace=True)\n\n    def init_weights(self):\n        \"\"\"Init module weights.\"\"\"\n        # Training Centripetal Model needs to reset parameters for Conv2d\n        super(HourglassNet, self).init_weights()\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                m.reset_parameters()\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        inter_feat = self.stem(x)\n        out_feats = []\n\n        for ind in range(self.num_stacks):\n            single_hourglass = self.hourglass_modules[ind]\n            out_conv = self.out_convs[ind]\n\n            hourglass_feat = single_hourglass(inter_feat)\n            out_feat = out_conv(hourglass_feat)\n            out_feats.append(out_feat)\n\n            if ind < self.num_stacks - 1:\n                inter_feat = self.conv1x1s[ind](\n                    inter_feat) + self.remap_convs[ind](\n                        out_feat)\n                inter_feat = self.inters[ind](self.relu(inter_feat))\n\n        return out_feats\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/hrnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import BaseModule, ModuleList, Sequential\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom ..builder import BACKBONES\nfrom .resnet import BasicBlock, Bottleneck\n\n\nclass HRModule(BaseModule):\n    \"\"\"High-Resolution Module for HRNet.\n\n    In this module, every branch has 4 BasicBlocks/Bottlenecks. Fusion/Exchange\n    is in this module.\n    \"\"\"\n\n    def __init__(self,\n                 num_branches,\n                 blocks,\n                 num_blocks,\n                 in_channels,\n                 num_channels,\n                 multiscale_output=True,\n                 with_cp=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 block_init_cfg=None,\n                 init_cfg=None):\n        super(HRModule, self).__init__(init_cfg)\n        self.block_init_cfg = block_init_cfg\n        self._check_branches(num_branches, num_blocks, in_channels,\n                             num_channels)\n\n        self.in_channels = in_channels\n        self.num_branches = num_branches\n\n        self.multiscale_output = multiscale_output\n        self.norm_cfg = norm_cfg\n        self.conv_cfg = conv_cfg\n        self.with_cp = with_cp\n        self.branches = self._make_branches(num_branches, blocks, num_blocks,\n                                            num_channels)\n        self.fuse_layers = self._make_fuse_layers()\n        self.relu = nn.ReLU(inplace=False)\n\n    def _check_branches(self, num_branches, num_blocks, in_channels,\n                        num_channels):\n        if num_branches != len(num_blocks):\n            error_msg = f'NUM_BRANCHES({num_branches}) ' \\\n                        f'!= NUM_BLOCKS({len(num_blocks)})'\n            raise ValueError(error_msg)\n\n        if num_branches != len(num_channels):\n            error_msg = f'NUM_BRANCHES({num_branches}) ' \\\n                        f'!= NUM_CHANNELS({len(num_channels)})'\n            raise ValueError(error_msg)\n\n        if num_branches != len(in_channels):\n            error_msg = f'NUM_BRANCHES({num_branches}) ' \\\n                        f'!= NUM_INCHANNELS({len(in_channels)})'\n            raise ValueError(error_msg)\n\n    def _make_one_branch(self,\n                         branch_index,\n                         block,\n                         num_blocks,\n                         num_channels,\n                         stride=1):\n        downsample = None\n        if stride != 1 or \\\n                self.in_channels[branch_index] != \\\n                num_channels[branch_index] * block.expansion:\n            downsample = nn.Sequential(\n                build_conv_layer(\n                    self.conv_cfg,\n                    self.in_channels[branch_index],\n                    num_channels[branch_index] * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    bias=False),\n                build_norm_layer(self.norm_cfg, num_channels[branch_index] *\n                                 block.expansion)[1])\n\n        layers = []\n        layers.append(\n            block(\n                self.in_channels[branch_index],\n                num_channels[branch_index],\n                stride,\n                downsample=downsample,\n                with_cp=self.with_cp,\n                norm_cfg=self.norm_cfg,\n                conv_cfg=self.conv_cfg,\n                init_cfg=self.block_init_cfg))\n        self.in_channels[branch_index] = \\\n            num_channels[branch_index] * block.expansion\n        for i in range(1, num_blocks[branch_index]):\n            layers.append(\n                block(\n                    self.in_channels[branch_index],\n                    num_channels[branch_index],\n                    with_cp=self.with_cp,\n                    norm_cfg=self.norm_cfg,\n                    conv_cfg=self.conv_cfg,\n                    init_cfg=self.block_init_cfg))\n\n        return Sequential(*layers)\n\n    def _make_branches(self, num_branches, block, num_blocks, num_channels):\n        branches = []\n\n        for i in range(num_branches):\n            branches.append(\n                self._make_one_branch(i, block, num_blocks, num_channels))\n\n        return ModuleList(branches)\n\n    def _make_fuse_layers(self):\n        if self.num_branches == 1:\n            return None\n\n        num_branches = self.num_branches\n        in_channels = self.in_channels\n        fuse_layers = []\n        num_out_branches = num_branches if self.multiscale_output else 1\n        for i in range(num_out_branches):\n            fuse_layer = []\n            for j in range(num_branches):\n                if j > i:\n                    fuse_layer.append(\n                        nn.Sequential(\n                            build_conv_layer(\n                                self.conv_cfg,\n                                in_channels[j],\n                                in_channels[i],\n                                kernel_size=1,\n                                stride=1,\n                                padding=0,\n                                bias=False),\n                            build_norm_layer(self.norm_cfg, in_channels[i])[1],\n                            nn.Upsample(\n                                scale_factor=2**(j - i), mode='nearest')))\n                elif j == i:\n                    fuse_layer.append(None)\n                else:\n                    conv_downsamples = []\n                    for k in range(i - j):\n                        if k == i - j - 1:\n                            conv_downsamples.append(\n                                nn.Sequential(\n                                    build_conv_layer(\n                                        self.conv_cfg,\n                                        in_channels[j],\n                                        in_channels[i],\n                                        kernel_size=3,\n                                        stride=2,\n                                        padding=1,\n                                        bias=False),\n                                    build_norm_layer(self.norm_cfg,\n                                                     in_channels[i])[1]))\n                        else:\n                            conv_downsamples.append(\n                                nn.Sequential(\n                                    build_conv_layer(\n                                        self.conv_cfg,\n                                        in_channels[j],\n                                        in_channels[j],\n                                        kernel_size=3,\n                                        stride=2,\n                                        padding=1,\n                                        bias=False),\n                                    build_norm_layer(self.norm_cfg,\n                                                     in_channels[j])[1],\n                                    nn.ReLU(inplace=False)))\n                    fuse_layer.append(nn.Sequential(*conv_downsamples))\n            fuse_layers.append(nn.ModuleList(fuse_layer))\n\n        return nn.ModuleList(fuse_layers)\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        if self.num_branches == 1:\n            return [self.branches[0](x[0])]\n\n        for i in range(self.num_branches):\n            x[i] = self.branches[i](x[i])\n\n        x_fuse = []\n        for i in range(len(self.fuse_layers)):\n            y = 0\n            for j in range(self.num_branches):\n                if i == j:\n                    y += x[j]\n                else:\n                    y += self.fuse_layers[i][j](x[j])\n            x_fuse.append(self.relu(y))\n        return x_fuse\n\n\n@BACKBONES.register_module()\nclass HRNet(BaseModule):\n    \"\"\"HRNet backbone.\n\n    `High-Resolution Representations for Labeling Pixels and Regions\n    arXiv: <https://arxiv.org/abs/1904.04514>`_.\n\n    Args:\n        extra (dict): Detailed configuration for each stage of HRNet.\n            There must be 4 stages, the configuration for each stage must have\n            5 keys:\n\n                - num_modules(int): The number of HRModule in this stage.\n                - num_branches(int): The number of branches in the HRModule.\n                - block(str): The type of convolution block.\n                - num_blocks(tuple): The number of blocks in each branch.\n                    The length must be equal to num_branches.\n                - num_channels(tuple): The number of channels in each branch.\n                    The length must be equal to num_branches.\n        in_channels (int): Number of input image channels. Default: 3.\n        conv_cfg (dict): Dictionary to construct and config conv layer.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only. Default: True.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed. Default: False.\n        zero_init_residual (bool): Whether to use zero init for last norm layer\n            in resblocks to let them behave as identity. Default: False.\n        multiscale_output (bool): Whether to output multi-level features\n            produced by multiple branches. If False, only the first level\n            feature will be output. Default: True.\n        pretrained (str, optional): Model pretrained path. Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None.\n\n    Example:\n        >>> from mmdet.models import HRNet\n        >>> import torch\n        >>> extra = dict(\n        >>>     stage1=dict(\n        >>>         num_modules=1,\n        >>>         num_branches=1,\n        >>>         block='BOTTLENECK',\n        >>>         num_blocks=(4, ),\n        >>>         num_channels=(64, )),\n        >>>     stage2=dict(\n        >>>         num_modules=1,\n        >>>         num_branches=2,\n        >>>         block='BASIC',\n        >>>         num_blocks=(4, 4),\n        >>>         num_channels=(32, 64)),\n        >>>     stage3=dict(\n        >>>         num_modules=4,\n        >>>         num_branches=3,\n        >>>         block='BASIC',\n        >>>         num_blocks=(4, 4, 4),\n        >>>         num_channels=(32, 64, 128)),\n        >>>     stage4=dict(\n        >>>         num_modules=3,\n        >>>         num_branches=4,\n        >>>         block='BASIC',\n        >>>         num_blocks=(4, 4, 4, 4),\n        >>>         num_channels=(32, 64, 128, 256)))\n        >>> self = HRNet(extra, in_channels=1)\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 1, 32, 32)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 32, 8, 8)\n        (1, 64, 4, 4)\n        (1, 128, 2, 2)\n        (1, 256, 1, 1)\n    \"\"\"\n\n    blocks_dict = {'BASIC': BasicBlock, 'BOTTLENECK': Bottleneck}\n\n    def __init__(self,\n                 extra,\n                 in_channels=3,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 norm_eval=True,\n                 with_cp=False,\n                 zero_init_residual=False,\n                 multiscale_output=True,\n                 pretrained=None,\n                 init_cfg=None):\n        super(HRNet, self).__init__(init_cfg)\n\n        self.pretrained = pretrained\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        # Assert configurations of 4 stages are in extra\n        assert 'stage1' in extra and 'stage2' in extra \\\n               and 'stage3' in extra and 'stage4' in extra\n        # Assert whether the length of `num_blocks` and `num_channels` are\n        # equal to `num_branches`\n        for i in range(4):\n            cfg = extra[f'stage{i + 1}']\n            assert len(cfg['num_blocks']) == cfg['num_branches'] and \\\n                   len(cfg['num_channels']) == cfg['num_branches']\n\n        self.extra = extra\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.norm_eval = norm_eval\n        self.with_cp = with_cp\n        self.zero_init_residual = zero_init_residual\n\n        # stem net\n        self.norm1_name, norm1 = build_norm_layer(self.norm_cfg, 64, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(self.norm_cfg, 64, postfix=2)\n\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            in_channels,\n            64,\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            bias=False)\n\n        self.add_module(self.norm1_name, norm1)\n        self.conv2 = build_conv_layer(\n            self.conv_cfg,\n            64,\n            64,\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            bias=False)\n\n        self.add_module(self.norm2_name, norm2)\n        self.relu = nn.ReLU(inplace=True)\n\n        # stage 1\n        self.stage1_cfg = self.extra['stage1']\n        num_channels = self.stage1_cfg['num_channels'][0]\n        block_type = self.stage1_cfg['block']\n        num_blocks = self.stage1_cfg['num_blocks'][0]\n\n        block = self.blocks_dict[block_type]\n        stage1_out_channels = num_channels * block.expansion\n        self.layer1 = self._make_layer(block, 64, num_channels, num_blocks)\n\n        # stage 2\n        self.stage2_cfg = self.extra['stage2']\n        num_channels = self.stage2_cfg['num_channels']\n        block_type = self.stage2_cfg['block']\n\n        block = self.blocks_dict[block_type]\n        num_channels = [channel * block.expansion for channel in num_channels]\n        self.transition1 = self._make_transition_layer([stage1_out_channels],\n                                                       num_channels)\n        self.stage2, pre_stage_channels = self._make_stage(\n            self.stage2_cfg, num_channels)\n\n        # stage 3\n        self.stage3_cfg = self.extra['stage3']\n        num_channels = self.stage3_cfg['num_channels']\n        block_type = self.stage3_cfg['block']\n\n        block = self.blocks_dict[block_type]\n        num_channels = [channel * block.expansion for channel in num_channels]\n        self.transition2 = self._make_transition_layer(pre_stage_channels,\n                                                       num_channels)\n        self.stage3, pre_stage_channels = self._make_stage(\n            self.stage3_cfg, num_channels)\n\n        # stage 4\n        self.stage4_cfg = self.extra['stage4']\n        num_channels = self.stage4_cfg['num_channels']\n        block_type = self.stage4_cfg['block']\n\n        block = self.blocks_dict[block_type]\n        num_channels = [channel * block.expansion for channel in num_channels]\n        self.transition3 = self._make_transition_layer(pre_stage_channels,\n                                                       num_channels)\n        self.stage4, pre_stage_channels = self._make_stage(\n            self.stage4_cfg, num_channels, multiscale_output=multiscale_output)\n\n    @property\n    def norm1(self):\n        \"\"\"nn.Module: the normalization layer named \"norm1\" \"\"\"\n        return getattr(self, self.norm1_name)\n\n    @property\n    def norm2(self):\n        \"\"\"nn.Module: the normalization layer named \"norm2\" \"\"\"\n        return getattr(self, self.norm2_name)\n\n    def _make_transition_layer(self, num_channels_pre_layer,\n                               num_channels_cur_layer):\n        num_branches_cur = len(num_channels_cur_layer)\n        num_branches_pre = len(num_channels_pre_layer)\n\n        transition_layers = []\n        for i in range(num_branches_cur):\n            if i < num_branches_pre:\n                if num_channels_cur_layer[i] != num_channels_pre_layer[i]:\n                    transition_layers.append(\n                        nn.Sequential(\n                            build_conv_layer(\n                                self.conv_cfg,\n                                num_channels_pre_layer[i],\n                                num_channels_cur_layer[i],\n                                kernel_size=3,\n                                stride=1,\n                                padding=1,\n                                bias=False),\n                            build_norm_layer(self.norm_cfg,\n                                             num_channels_cur_layer[i])[1],\n                            nn.ReLU(inplace=True)))\n                else:\n                    transition_layers.append(None)\n            else:\n                conv_downsamples = []\n                for j in range(i + 1 - num_branches_pre):\n                    in_channels = num_channels_pre_layer[-1]\n                    out_channels = num_channels_cur_layer[i] \\\n                        if j == i - num_branches_pre else in_channels\n                    conv_downsamples.append(\n                        nn.Sequential(\n                            build_conv_layer(\n                                self.conv_cfg,\n                                in_channels,\n                                out_channels,\n                                kernel_size=3,\n                                stride=2,\n                                padding=1,\n                                bias=False),\n                            build_norm_layer(self.norm_cfg, out_channels)[1],\n                            nn.ReLU(inplace=True)))\n                transition_layers.append(nn.Sequential(*conv_downsamples))\n\n        return nn.ModuleList(transition_layers)\n\n    def _make_layer(self, block, inplanes, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                build_conv_layer(\n                    self.conv_cfg,\n                    inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    bias=False),\n                build_norm_layer(self.norm_cfg, planes * block.expansion)[1])\n\n        layers = []\n        block_init_cfg = None\n        if self.pretrained is None and not hasattr(\n                self, 'init_cfg') and self.zero_init_residual:\n            if block is BasicBlock:\n                block_init_cfg = dict(\n                    type='Constant', val=0, override=dict(name='norm2'))\n            elif block is Bottleneck:\n                block_init_cfg = dict(\n                    type='Constant', val=0, override=dict(name='norm3'))\n        layers.append(\n            block(\n                inplanes,\n                planes,\n                stride,\n                downsample=downsample,\n                with_cp=self.with_cp,\n                norm_cfg=self.norm_cfg,\n                conv_cfg=self.conv_cfg,\n                init_cfg=block_init_cfg,\n            ))\n        inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(\n                block(\n                    inplanes,\n                    planes,\n                    with_cp=self.with_cp,\n                    norm_cfg=self.norm_cfg,\n                    conv_cfg=self.conv_cfg,\n                    init_cfg=block_init_cfg))\n\n        return Sequential(*layers)\n\n    def _make_stage(self, layer_config, in_channels, multiscale_output=True):\n        num_modules = layer_config['num_modules']\n        num_branches = layer_config['num_branches']\n        num_blocks = layer_config['num_blocks']\n        num_channels = layer_config['num_channels']\n        block = self.blocks_dict[layer_config['block']]\n\n        hr_modules = []\n        block_init_cfg = None\n        if self.pretrained is None and not hasattr(\n                self, 'init_cfg') and self.zero_init_residual:\n            if block is BasicBlock:\n                block_init_cfg = dict(\n                    type='Constant', val=0, override=dict(name='norm2'))\n            elif block is Bottleneck:\n                block_init_cfg = dict(\n                    type='Constant', val=0, override=dict(name='norm3'))\n\n        for i in range(num_modules):\n            # multi_scale_output is only used for the last module\n            if not multiscale_output and i == num_modules - 1:\n                reset_multiscale_output = False\n            else:\n                reset_multiscale_output = True\n\n            hr_modules.append(\n                HRModule(\n                    num_branches,\n                    block,\n                    num_blocks,\n                    in_channels,\n                    num_channels,\n                    reset_multiscale_output,\n                    with_cp=self.with_cp,\n                    norm_cfg=self.norm_cfg,\n                    conv_cfg=self.conv_cfg,\n                    block_init_cfg=block_init_cfg))\n\n        return Sequential(*hr_modules), in_channels\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        x = self.conv1(x)\n        x = self.norm1(x)\n        x = self.relu(x)\n        x = self.conv2(x)\n        x = self.norm2(x)\n        x = self.relu(x)\n        x = self.layer1(x)\n\n        x_list = []\n        for i in range(self.stage2_cfg['num_branches']):\n            if self.transition1[i] is not None:\n                x_list.append(self.transition1[i](x))\n            else:\n                x_list.append(x)\n        y_list = self.stage2(x_list)\n\n        x_list = []\n        for i in range(self.stage3_cfg['num_branches']):\n            if self.transition2[i] is not None:\n                x_list.append(self.transition2[i](y_list[-1]))\n            else:\n                x_list.append(y_list[i])\n        y_list = self.stage3(x_list)\n\n        x_list = []\n        for i in range(self.stage4_cfg['num_branches']):\n            if self.transition3[i] is not None:\n                x_list.append(self.transition3[i](y_list[-1]))\n            else:\n                x_list.append(y_list[i])\n        y_list = self.stage4(x_list)\n\n        return y_list\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode will keeping the normalization\n        layer freezed.\"\"\"\n        super(HRNet, self).train(mode)\n        if mode and self.norm_eval:\n            for m in self.modules():\n                # trick: eval have effect on BatchNorm only\n                if isinstance(m, _BatchNorm):\n                    m.eval()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/mobilenet_v2.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom ..builder import BACKBONES\nfrom ..utils import InvertedResidual, make_divisible\n\n\n@BACKBONES.register_module()\nclass MobileNetV2(BaseModule):\n    \"\"\"MobileNetV2 backbone.\n\n    Args:\n        widen_factor (float): Width multiplier, multiply number of\n            channels in each layer by this amount. Default: 1.0.\n        out_indices (Sequence[int], optional): Output from which stages.\n            Default: (1, 2, 4, 7).\n        frozen_stages (int): Stages to be frozen (all param fixed).\n            Default: -1, which means not freezing any parameters.\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Default: None, which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN').\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='ReLU6').\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only. Default: False.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed. Default: False.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    # Parameters to build layers. 4 parameters are needed to construct a\n    # layer, from left to right: expand_ratio, channel, num_blocks, stride.\n    arch_settings = [[1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2],\n                     [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2],\n                     [6, 320, 1, 1]]\n\n    def __init__(self,\n                 widen_factor=1.,\n                 out_indices=(1, 2, 4, 7),\n                 frozen_stages=-1,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 act_cfg=dict(type='ReLU6'),\n                 norm_eval=False,\n                 with_cp=False,\n                 pretrained=None,\n                 init_cfg=None):\n        super(MobileNetV2, self).__init__(init_cfg)\n\n        self.pretrained = pretrained\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        self.widen_factor = widen_factor\n        self.out_indices = out_indices\n        if not set(out_indices).issubset(set(range(0, 8))):\n            raise ValueError('out_indices must be a subset of range'\n                             f'(0, 8). But received {out_indices}')\n\n        if frozen_stages not in range(-1, 8):\n            raise ValueError('frozen_stages must be in range(-1, 8). '\n                             f'But received {frozen_stages}')\n        self.out_indices = out_indices\n        self.frozen_stages = frozen_stages\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.norm_eval = norm_eval\n        self.with_cp = with_cp\n\n        self.in_channels = make_divisible(32 * widen_factor, 8)\n\n        self.conv1 = ConvModule(\n            in_channels=3,\n            out_channels=self.in_channels,\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            act_cfg=self.act_cfg)\n\n        self.layers = []\n\n        for i, layer_cfg in enumerate(self.arch_settings):\n            expand_ratio, channel, num_blocks, stride = layer_cfg\n            out_channels = make_divisible(channel * widen_factor, 8)\n            inverted_res_layer = self.make_layer(\n                out_channels=out_channels,\n                num_blocks=num_blocks,\n                stride=stride,\n                expand_ratio=expand_ratio)\n            layer_name = f'layer{i + 1}'\n            self.add_module(layer_name, inverted_res_layer)\n            self.layers.append(layer_name)\n\n        if widen_factor > 1.0:\n            self.out_channel = int(1280 * widen_factor)\n        else:\n            self.out_channel = 1280\n\n        layer = ConvModule(\n            in_channels=self.in_channels,\n            out_channels=self.out_channel,\n            kernel_size=1,\n            stride=1,\n            padding=0,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            act_cfg=self.act_cfg)\n        self.add_module('conv2', layer)\n        self.layers.append('conv2')\n\n    def make_layer(self, out_channels, num_blocks, stride, expand_ratio):\n        \"\"\"Stack InvertedResidual blocks to build a layer for MobileNetV2.\n\n        Args:\n            out_channels (int): out_channels of block.\n            num_blocks (int): number of blocks.\n            stride (int): stride of the first block. Default: 1\n            expand_ratio (int): Expand the number of channels of the\n                hidden layer in InvertedResidual by this ratio. Default: 6.\n        \"\"\"\n        layers = []\n        for i in range(num_blocks):\n            if i >= 1:\n                stride = 1\n            layers.append(\n                InvertedResidual(\n                    self.in_channels,\n                    out_channels,\n                    mid_channels=int(round(self.in_channels * expand_ratio)),\n                    stride=stride,\n                    with_expand_conv=expand_ratio != 1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg,\n                    with_cp=self.with_cp))\n            self.in_channels = out_channels\n\n        return nn.Sequential(*layers)\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            for param in self.conv1.parameters():\n                param.requires_grad = False\n        for i in range(1, self.frozen_stages + 1):\n            layer = getattr(self, f'layer{i}')\n            layer.eval()\n            for param in layer.parameters():\n                param.requires_grad = False\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        x = self.conv1(x)\n        outs = []\n        for i, layer_name in enumerate(self.layers):\n            layer = getattr(self, layer_name)\n            x = layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode while keep normalization layer\n        frozen.\"\"\"\n        super(MobileNetV2, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                # trick: eval have effect on BatchNorm only\n                if isinstance(m, _BatchNorm):\n                    m.eval()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/pvt.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (Conv2d, build_activation_layer, build_norm_layer,\n                      constant_init, normal_init, trunc_normal_init)\nfrom mmcv.cnn.bricks.drop import build_dropout\nfrom mmcv.cnn.bricks.transformer import MultiheadAttention\nfrom mmcv.cnn.utils.weight_init import trunc_normal_\nfrom mmcv.runner import (BaseModule, ModuleList, Sequential, _load_checkpoint,\n                         load_state_dict)\nfrom torch.nn.modules.utils import _pair as to_2tuple\n\nfrom ...utils import get_root_logger\nfrom ..builder import BACKBONES\nfrom ..utils import PatchEmbed, nchw_to_nlc, nlc_to_nchw, pvt_convert\n\n\nclass MixFFN(BaseModule):\n    \"\"\"An implementation of MixFFN of PVT.\n\n    The differences between MixFFN & FFN:\n        1. Use 1X1 Conv to replace Linear layer.\n        2. Introduce 3X3 Depth-wise Conv to encode positional information.\n\n    Args:\n        embed_dims (int): The feature dimension. Same as\n            `MultiheadAttention`.\n        feedforward_channels (int): The hidden dimension of FFNs.\n        act_cfg (dict, optional): The activation config for FFNs.\n            Default: dict(type='GELU').\n        ffn_drop (float, optional): Probability of an element to be\n            zeroed in FFN. Default 0.0.\n        dropout_layer (obj:`ConfigDict`): The dropout_layer used\n            when adding the shortcut.\n            Default: None.\n        use_conv (bool): If True, add 3x3 DWConv between two Linear layers.\n            Defaults: False.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 feedforward_channels,\n                 act_cfg=dict(type='GELU'),\n                 ffn_drop=0.,\n                 dropout_layer=None,\n                 use_conv=False,\n                 init_cfg=None):\n        super(MixFFN, self).__init__(init_cfg=init_cfg)\n\n        self.embed_dims = embed_dims\n        self.feedforward_channels = feedforward_channels\n        self.act_cfg = act_cfg\n        activate = build_activation_layer(act_cfg)\n\n        in_channels = embed_dims\n        fc1 = Conv2d(\n            in_channels=in_channels,\n            out_channels=feedforward_channels,\n            kernel_size=1,\n            stride=1,\n            bias=True)\n        if use_conv:\n            # 3x3 depth wise conv to provide positional encode information\n            dw_conv = Conv2d(\n                in_channels=feedforward_channels,\n                out_channels=feedforward_channels,\n                kernel_size=3,\n                stride=1,\n                padding=(3 - 1) // 2,\n                bias=True,\n                groups=feedforward_channels)\n        fc2 = Conv2d(\n            in_channels=feedforward_channels,\n            out_channels=in_channels,\n            kernel_size=1,\n            stride=1,\n            bias=True)\n        drop = nn.Dropout(ffn_drop)\n        layers = [fc1, activate, drop, fc2, drop]\n        if use_conv:\n            layers.insert(1, dw_conv)\n        self.layers = Sequential(*layers)\n        self.dropout_layer = build_dropout(\n            dropout_layer) if dropout_layer else torch.nn.Identity()\n\n    def forward(self, x, hw_shape, identity=None):\n        out = nlc_to_nchw(x, hw_shape)\n        out = self.layers(out)\n        out = nchw_to_nlc(out)\n        if identity is None:\n            identity = x\n        return identity + self.dropout_layer(out)\n\n\nclass SpatialReductionAttention(MultiheadAttention):\n    \"\"\"An implementation of Spatial Reduction Attention of PVT.\n\n    This module is modified from MultiheadAttention which is a module from\n    mmcv.cnn.bricks.transformer.\n\n    Args:\n        embed_dims (int): The embedding dimension.\n        num_heads (int): Parallel attention heads.\n        attn_drop (float): A Dropout layer on attn_output_weights.\n            Default: 0.0.\n        proj_drop (float): A Dropout layer after `nn.MultiheadAttention`.\n            Default: 0.0.\n        dropout_layer (obj:`ConfigDict`): The dropout_layer used\n            when adding the shortcut. Default: None.\n        batch_first (bool): Key, Query and Value are shape of\n            (batch, n, embed_dim)\n            or (n, batch, embed_dim). Default: False.\n        qkv_bias (bool): enable bias for qkv if True. Default: True.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        sr_ratio (int): The ratio of spatial reduction of Spatial Reduction\n            Attention of PVT. Default: 1.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 attn_drop=0.,\n                 proj_drop=0.,\n                 dropout_layer=None,\n                 batch_first=True,\n                 qkv_bias=True,\n                 norm_cfg=dict(type='LN'),\n                 sr_ratio=1,\n                 init_cfg=None):\n        super().__init__(\n            embed_dims,\n            num_heads,\n            attn_drop,\n            proj_drop,\n            batch_first=batch_first,\n            dropout_layer=dropout_layer,\n            bias=qkv_bias,\n            init_cfg=init_cfg)\n\n        self.sr_ratio = sr_ratio\n        if sr_ratio > 1:\n            self.sr = Conv2d(\n                in_channels=embed_dims,\n                out_channels=embed_dims,\n                kernel_size=sr_ratio,\n                stride=sr_ratio)\n            # The ret[0] of build_norm_layer is norm name.\n            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]\n\n        # handle the BC-breaking from https://github.com/open-mmlab/mmcv/pull/1418 # noqa\n        from mmdet import digit_version, mmcv_version\n        if mmcv_version < digit_version('1.3.17'):\n            warnings.warn('The legacy version of forward function in'\n                          'SpatialReductionAttention is deprecated in'\n                          'mmcv>=1.3.17 and will no longer support in the'\n                          'future. Please upgrade your mmcv.')\n            self.forward = self.legacy_forward\n\n    def forward(self, x, hw_shape, identity=None):\n\n        x_q = x\n        if self.sr_ratio > 1:\n            x_kv = nlc_to_nchw(x, hw_shape)\n            x_kv = self.sr(x_kv)\n            x_kv = nchw_to_nlc(x_kv)\n            x_kv = self.norm(x_kv)\n        else:\n            x_kv = x\n\n        if identity is None:\n            identity = x_q\n\n        # Because the dataflow('key', 'query', 'value') of\n        # ``torch.nn.MultiheadAttention`` is (num_query, batch,\n        # embed_dims), We should adjust the shape of dataflow from\n        # batch_first (batch, num_query, embed_dims) to num_query_first\n        # (num_query ,batch, embed_dims), and recover ``attn_output``\n        # from num_query_first to batch_first.\n        if self.batch_first:\n            x_q = x_q.transpose(0, 1)\n            x_kv = x_kv.transpose(0, 1)\n\n        out = self.attn(query=x_q, key=x_kv, value=x_kv)[0]\n\n        if self.batch_first:\n            out = out.transpose(0, 1)\n\n        return identity + self.dropout_layer(self.proj_drop(out))\n\n    def legacy_forward(self, x, hw_shape, identity=None):\n        \"\"\"multi head attention forward in mmcv version < 1.3.17.\"\"\"\n        x_q = x\n        if self.sr_ratio > 1:\n            x_kv = nlc_to_nchw(x, hw_shape)\n            x_kv = self.sr(x_kv)\n            x_kv = nchw_to_nlc(x_kv)\n            x_kv = self.norm(x_kv)\n        else:\n            x_kv = x\n\n        if identity is None:\n            identity = x_q\n\n        out = self.attn(query=x_q, key=x_kv, value=x_kv)[0]\n\n        return identity + self.dropout_layer(self.proj_drop(out))\n\n\nclass PVTEncoderLayer(BaseModule):\n    \"\"\"Implements one encoder layer in PVT.\n\n    Args:\n        embed_dims (int): The feature dimension.\n        num_heads (int): Parallel attention heads.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        drop_rate (float): Probability of an element to be zeroed.\n            after the feed forward layer. Default: 0.0.\n        attn_drop_rate (float): The drop out rate for attention layer.\n            Default: 0.0.\n        drop_path_rate (float): stochastic depth rate. Default: 0.0.\n        qkv_bias (bool): enable bias for qkv if True.\n            Default: True.\n        act_cfg (dict): The activation config for FFNs.\n            Default: dict(type='GELU').\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        sr_ratio (int): The ratio of spatial reduction of Spatial Reduction\n            Attention of PVT. Default: 1.\n        use_conv_ffn (bool): If True, use Convolutional FFN to replace FFN.\n            Default: False.\n        init_cfg (dict, optional): Initialization config dict.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 feedforward_channels,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.,\n                 qkv_bias=True,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 sr_ratio=1,\n                 use_conv_ffn=False,\n                 init_cfg=None):\n        super(PVTEncoderLayer, self).__init__(init_cfg=init_cfg)\n\n        # The ret[0] of build_norm_layer is norm name.\n        self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]\n\n        self.attn = SpatialReductionAttention(\n            embed_dims=embed_dims,\n            num_heads=num_heads,\n            attn_drop=attn_drop_rate,\n            proj_drop=drop_rate,\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\n            qkv_bias=qkv_bias,\n            norm_cfg=norm_cfg,\n            sr_ratio=sr_ratio)\n\n        # The ret[0] of build_norm_layer is norm name.\n        self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]\n\n        self.ffn = MixFFN(\n            embed_dims=embed_dims,\n            feedforward_channels=feedforward_channels,\n            ffn_drop=drop_rate,\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\n            use_conv=use_conv_ffn,\n            act_cfg=act_cfg)\n\n    def forward(self, x, hw_shape):\n        x = self.attn(self.norm1(x), hw_shape, identity=x)\n        x = self.ffn(self.norm2(x), hw_shape, identity=x)\n\n        return x\n\n\nclass AbsolutePositionEmbedding(BaseModule):\n    \"\"\"An implementation of the absolute position embedding in PVT.\n\n    Args:\n        pos_shape (int): The shape of the absolute position embedding.\n        pos_dim (int): The dimension of the absolute position embedding.\n        drop_rate (float): Probability of an element to be zeroed.\n            Default: 0.0.\n    \"\"\"\n\n    def __init__(self, pos_shape, pos_dim, drop_rate=0., init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n\n        if isinstance(pos_shape, int):\n            pos_shape = to_2tuple(pos_shape)\n        elif isinstance(pos_shape, tuple):\n            if len(pos_shape) == 1:\n                pos_shape = to_2tuple(pos_shape[0])\n            assert len(pos_shape) == 2, \\\n                f'The size of image should have length 1 or 2, ' \\\n                f'but got {len(pos_shape)}'\n        self.pos_shape = pos_shape\n        self.pos_dim = pos_dim\n\n        self.pos_embed = nn.Parameter(\n            torch.zeros(1, pos_shape[0] * pos_shape[1], pos_dim))\n        self.drop = nn.Dropout(p=drop_rate)\n\n    def init_weights(self):\n        trunc_normal_(self.pos_embed, std=0.02)\n\n    def resize_pos_embed(self, pos_embed, input_shape, mode='bilinear'):\n        \"\"\"Resize pos_embed weights.\n\n        Resize pos_embed using bilinear interpolate method.\n\n        Args:\n            pos_embed (torch.Tensor): Position embedding weights.\n            input_shape (tuple): Tuple for (downsampled input image height,\n                downsampled input image width).\n            mode (str): Algorithm used for upsampling:\n                ``'nearest'`` | ``'linear'`` | ``'bilinear'`` | ``'bicubic'`` |\n                ``'trilinear'``. Default: ``'bilinear'``.\n\n        Return:\n            torch.Tensor: The resized pos_embed of shape [B, L_new, C].\n        \"\"\"\n        assert pos_embed.ndim == 3, 'shape of pos_embed must be [B, L, C]'\n        pos_h, pos_w = self.pos_shape\n        pos_embed_weight = pos_embed[:, (-1 * pos_h * pos_w):]\n        pos_embed_weight = pos_embed_weight.reshape(\n            1, pos_h, pos_w, self.pos_dim).permute(0, 3, 1, 2).contiguous()\n        pos_embed_weight = F.interpolate(\n            pos_embed_weight, size=input_shape, mode=mode)\n        pos_embed_weight = torch.flatten(pos_embed_weight,\n                                         2).transpose(1, 2).contiguous()\n        pos_embed = pos_embed_weight\n\n        return pos_embed\n\n    def forward(self, x, hw_shape, mode='bilinear'):\n        pos_embed = self.resize_pos_embed(self.pos_embed, hw_shape, mode)\n        return self.drop(x + pos_embed)\n\n\n@BACKBONES.register_module()\nclass PyramidVisionTransformer(BaseModule):\n    \"\"\"Pyramid Vision Transformer (PVT)\n\n    Implementation of `Pyramid Vision Transformer: A Versatile Backbone for\n    Dense Prediction without Convolutions\n    <https://arxiv.org/pdf/2102.12122.pdf>`_.\n\n    Args:\n        pretrain_img_size (int | tuple[int]): The size of input image when\n            pretrain. Defaults: 224.\n        in_channels (int): Number of input channels. Default: 3.\n        embed_dims (int): Embedding dimension. Default: 64.\n        num_stags (int): The num of stages. Default: 4.\n        num_layers (Sequence[int]): The layer number of each transformer encode\n            layer. Default: [3, 4, 6, 3].\n        num_heads (Sequence[int]): The attention heads of each transformer\n            encode layer. Default: [1, 2, 5, 8].\n        patch_sizes (Sequence[int]): The patch_size of each patch embedding.\n            Default: [4, 2, 2, 2].\n        strides (Sequence[int]): The stride of each patch embedding.\n            Default: [4, 2, 2, 2].\n        paddings (Sequence[int]): The padding of each patch embedding.\n            Default: [0, 0, 0, 0].\n        sr_ratios (Sequence[int]): The spatial reduction rate of each\n            transformer encode layer. Default: [8, 4, 2, 1].\n        out_indices (Sequence[int] | int): Output from which stages.\n            Default: (0, 1, 2, 3).\n        mlp_ratios (Sequence[int]): The ratio of the mlp hidden dim to the\n            embedding dim of each transformer encode layer.\n            Default: [8, 8, 4, 4].\n        qkv_bias (bool): Enable bias for qkv if True. Default: True.\n        drop_rate (float): Probability of an element to be zeroed.\n            Default 0.0.\n        attn_drop_rate (float): The drop out rate for attention layer.\n            Default 0.0.\n        drop_path_rate (float): stochastic depth rate. Default 0.1.\n        use_abs_pos_embed (bool): If True, add absolute position embedding to\n            the patch embedding. Defaults: True.\n        use_conv_ffn (bool): If True, use Convolutional FFN to replace FFN.\n            Default: False.\n        act_cfg (dict): The activation config for FFNs.\n            Default: dict(type='GELU').\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='LN').\n        pretrained (str, optional): model pretrained path. Default: None.\n        convert_weights (bool): The flag indicates whether the\n            pre-trained model is from the original repo. We may need\n            to convert some keys to make it compatible.\n            Default: True.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 pretrain_img_size=224,\n                 in_channels=3,\n                 embed_dims=64,\n                 num_stages=4,\n                 num_layers=[3, 4, 6, 3],\n                 num_heads=[1, 2, 5, 8],\n                 patch_sizes=[4, 2, 2, 2],\n                 strides=[4, 2, 2, 2],\n                 paddings=[0, 0, 0, 0],\n                 sr_ratios=[8, 4, 2, 1],\n                 out_indices=(0, 1, 2, 3),\n                 mlp_ratios=[8, 8, 4, 4],\n                 qkv_bias=True,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.1,\n                 use_abs_pos_embed=True,\n                 norm_after_stage=False,\n                 use_conv_ffn=False,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN', eps=1e-6),\n                 pretrained=None,\n                 convert_weights=True,\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n\n        self.convert_weights = convert_weights\n        if isinstance(pretrain_img_size, int):\n            pretrain_img_size = to_2tuple(pretrain_img_size)\n        elif isinstance(pretrain_img_size, tuple):\n            if len(pretrain_img_size) == 1:\n                pretrain_img_size = to_2tuple(pretrain_img_size[0])\n            assert len(pretrain_img_size) == 2, \\\n                f'The size of image should have length 1 or 2, ' \\\n                f'but got {len(pretrain_img_size)}'\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be setting at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            self.init_cfg = init_cfg\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        self.embed_dims = embed_dims\n\n        self.num_stages = num_stages\n        self.num_layers = num_layers\n        self.num_heads = num_heads\n        self.patch_sizes = patch_sizes\n        self.strides = strides\n        self.sr_ratios = sr_ratios\n        assert num_stages == len(num_layers) == len(num_heads) \\\n               == len(patch_sizes) == len(strides) == len(sr_ratios)\n\n        self.out_indices = out_indices\n        assert max(out_indices) < self.num_stages\n        self.pretrained = pretrained\n\n        # transformer encoder\n        dpr = [\n            x.item()\n            for x in torch.linspace(0, drop_path_rate, sum(num_layers))\n        ]  # stochastic num_layer decay rule\n\n        cur = 0\n        self.layers = ModuleList()\n        for i, num_layer in enumerate(num_layers):\n            embed_dims_i = embed_dims * num_heads[i]\n            patch_embed = PatchEmbed(\n                in_channels=in_channels,\n                embed_dims=embed_dims_i,\n                kernel_size=patch_sizes[i],\n                stride=strides[i],\n                padding=paddings[i],\n                bias=True,\n                norm_cfg=norm_cfg)\n\n            layers = ModuleList()\n            if use_abs_pos_embed:\n                pos_shape = pretrain_img_size // np.prod(patch_sizes[:i + 1])\n                pos_embed = AbsolutePositionEmbedding(\n                    pos_shape=pos_shape,\n                    pos_dim=embed_dims_i,\n                    drop_rate=drop_rate)\n                layers.append(pos_embed)\n            layers.extend([\n                PVTEncoderLayer(\n                    embed_dims=embed_dims_i,\n                    num_heads=num_heads[i],\n                    feedforward_channels=mlp_ratios[i] * embed_dims_i,\n                    drop_rate=drop_rate,\n                    attn_drop_rate=attn_drop_rate,\n                    drop_path_rate=dpr[cur + idx],\n                    qkv_bias=qkv_bias,\n                    act_cfg=act_cfg,\n                    norm_cfg=norm_cfg,\n                    sr_ratio=sr_ratios[i],\n                    use_conv_ffn=use_conv_ffn) for idx in range(num_layer)\n            ])\n            in_channels = embed_dims_i\n            # The ret[0] of build_norm_layer is norm name.\n            if norm_after_stage:\n                norm = build_norm_layer(norm_cfg, embed_dims_i)[1]\n            else:\n                norm = nn.Identity()\n            self.layers.append(ModuleList([patch_embed, layers, norm]))\n            cur += num_layer\n\n    def init_weights(self):\n        logger = get_root_logger()\n        if self.init_cfg is None:\n            logger.warn(f'No pre-trained weights for '\n                        f'{self.__class__.__name__}, '\n                        f'training start from scratch')\n            for m in self.modules():\n                if isinstance(m, nn.Linear):\n                    trunc_normal_init(m, std=.02, bias=0.)\n                elif isinstance(m, nn.LayerNorm):\n                    constant_init(m, 1.0)\n                elif isinstance(m, nn.Conv2d):\n                    fan_out = m.kernel_size[0] * m.kernel_size[\n                        1] * m.out_channels\n                    fan_out //= m.groups\n                    normal_init(m, 0, math.sqrt(2.0 / fan_out))\n                elif isinstance(m, AbsolutePositionEmbedding):\n                    m.init_weights()\n        else:\n            assert 'checkpoint' in self.init_cfg, f'Only support ' \\\n                                                  f'specify `Pretrained` in ' \\\n                                                  f'`init_cfg` in ' \\\n                                                  f'{self.__class__.__name__} '\n            checkpoint = _load_checkpoint(\n                self.init_cfg.checkpoint, logger=logger, map_location='cpu')\n            logger.warn(f'Load pre-trained model for '\n                        f'{self.__class__.__name__} from original repo')\n            if 'state_dict' in checkpoint:\n                state_dict = checkpoint['state_dict']\n            elif 'model' in checkpoint:\n                state_dict = checkpoint['model']\n            else:\n                state_dict = checkpoint\n            if self.convert_weights:\n                # Because pvt backbones are not supported by mmcls,\n                # so we need to convert pre-trained weights to match this\n                # implementation.\n                state_dict = pvt_convert(state_dict)\n            load_state_dict(self, state_dict, strict=False, logger=logger)\n\n    def forward(self, x):\n        outs = []\n\n        for i, layer in enumerate(self.layers):\n            x, hw_shape = layer[0](x)\n\n            for block in layer[1]:\n                x = block(x, hw_shape)\n            x = layer[2](x)\n            x = nlc_to_nchw(x, hw_shape)\n            if i in self.out_indices:\n                outs.append(x)\n\n        return outs\n\n\n@BACKBONES.register_module()\nclass PyramidVisionTransformerV2(PyramidVisionTransformer):\n    \"\"\"Implementation of `PVTv2: Improved Baselines with Pyramid Vision\n    Transformer <https://arxiv.org/pdf/2106.13797.pdf>`_.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(PyramidVisionTransformerV2, self).__init__(\n            patch_sizes=[7, 3, 3, 3],\n            paddings=[3, 1, 1, 1],\n            use_abs_pos_embed=False,\n            norm_after_stage=True,\n            use_conv_ffn=True,\n            **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/regnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport numpy as np\nimport torch.nn as nn\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\n\nfrom ..builder import BACKBONES\nfrom .resnet import ResNet\nfrom .resnext import Bottleneck\n\n\n@BACKBONES.register_module()\nclass RegNet(ResNet):\n    \"\"\"RegNet backbone.\n\n    More details can be found in `paper <https://arxiv.org/abs/2003.13678>`_ .\n\n    Args:\n        arch (dict): The parameter of RegNets.\n\n            - w0 (int): initial width\n            - wa (float): slope of width\n            - wm (float): quantization parameter to quantize the width\n            - depth (int): depth of the backbone\n            - group_w (int): width of group\n            - bot_mul (float): bottleneck ratio, i.e. expansion of bottleneck.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        base_channels (int): Base channels after stem layer.\n        in_channels (int): Number of input image channels. Default: 3.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        frozen_stages (int): Stages to be frozen (all param fixed). -1 means\n            not freezing any parameters.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    Example:\n        >>> from mmdet.models import RegNet\n        >>> import torch\n        >>> self = RegNet(\n                arch=dict(\n                    w0=88,\n                    wa=26.31,\n                    wm=2.25,\n                    group_w=48,\n                    depth=25,\n                    bot_mul=1.0))\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 32, 32)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 96, 8, 8)\n        (1, 192, 4, 4)\n        (1, 432, 2, 2)\n        (1, 1008, 1, 1)\n    \"\"\"\n    arch_settings = {\n        'regnetx_400mf':\n        dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22, bot_mul=1.0),\n        'regnetx_800mf':\n        dict(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16, bot_mul=1.0),\n        'regnetx_1.6gf':\n        dict(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18, bot_mul=1.0),\n        'regnetx_3.2gf':\n        dict(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25, bot_mul=1.0),\n        'regnetx_4.0gf':\n        dict(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23, bot_mul=1.0),\n        'regnetx_6.4gf':\n        dict(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17, bot_mul=1.0),\n        'regnetx_8.0gf':\n        dict(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23, bot_mul=1.0),\n        'regnetx_12gf':\n        dict(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19, bot_mul=1.0),\n    }\n\n    def __init__(self,\n                 arch,\n                 in_channels=3,\n                 stem_channels=32,\n                 base_channels=32,\n                 strides=(2, 2, 2, 2),\n                 dilations=(1, 1, 1, 1),\n                 out_indices=(0, 1, 2, 3),\n                 style='pytorch',\n                 deep_stem=False,\n                 avg_down=False,\n                 frozen_stages=-1,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 norm_eval=True,\n                 dcn=None,\n                 stage_with_dcn=(False, False, False, False),\n                 plugins=None,\n                 with_cp=False,\n                 zero_init_residual=True,\n                 pretrained=None,\n                 init_cfg=None):\n        super(ResNet, self).__init__(init_cfg)\n\n        # Generate RegNet parameters first\n        if isinstance(arch, str):\n            assert arch in self.arch_settings, \\\n                f'\"arch\": \"{arch}\" is not one of the' \\\n                ' arch_settings'\n            arch = self.arch_settings[arch]\n        elif not isinstance(arch, dict):\n            raise ValueError('Expect \"arch\" to be either a string '\n                             f'or a dict, got {type(arch)}')\n\n        widths, num_stages = self.generate_regnet(\n            arch['w0'],\n            arch['wa'],\n            arch['wm'],\n            arch['depth'],\n        )\n        # Convert to per stage format\n        stage_widths, stage_blocks = self.get_stages_from_blocks(widths)\n        # Generate group widths and bot muls\n        group_widths = [arch['group_w'] for _ in range(num_stages)]\n        self.bottleneck_ratio = [arch['bot_mul'] for _ in range(num_stages)]\n        # Adjust the compatibility of stage_widths and group_widths\n        stage_widths, group_widths = self.adjust_width_group(\n            stage_widths, self.bottleneck_ratio, group_widths)\n\n        # Group params by stage\n        self.stage_widths = stage_widths\n        self.group_widths = group_widths\n        self.depth = sum(stage_blocks)\n        self.stem_channels = stem_channels\n        self.base_channels = base_channels\n        self.num_stages = num_stages\n        assert num_stages >= 1 and num_stages <= 4\n        self.strides = strides\n        self.dilations = dilations\n        assert len(strides) == len(dilations) == num_stages\n        self.out_indices = out_indices\n        assert max(out_indices) < num_stages\n        self.style = style\n        self.deep_stem = deep_stem\n        self.avg_down = avg_down\n        self.frozen_stages = frozen_stages\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.with_cp = with_cp\n        self.norm_eval = norm_eval\n        self.dcn = dcn\n        self.stage_with_dcn = stage_with_dcn\n        if dcn is not None:\n            assert len(stage_with_dcn) == num_stages\n        self.plugins = plugins\n        self.zero_init_residual = zero_init_residual\n        self.block = Bottleneck\n        expansion_bak = self.block.expansion\n        self.block.expansion = 1\n        self.stage_blocks = stage_blocks[:num_stages]\n\n        self._make_stem_layer(in_channels, stem_channels)\n\n        block_init_cfg = None\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n                if self.zero_init_residual:\n                    block_init_cfg = dict(\n                        type='Constant', val=0, override=dict(name='norm3'))\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        self.inplanes = stem_channels\n        self.res_layers = []\n        for i, num_blocks in enumerate(self.stage_blocks):\n            stride = self.strides[i]\n            dilation = self.dilations[i]\n            group_width = self.group_widths[i]\n            width = int(round(self.stage_widths[i] * self.bottleneck_ratio[i]))\n            stage_groups = width // group_width\n\n            dcn = self.dcn if self.stage_with_dcn[i] else None\n            if self.plugins is not None:\n                stage_plugins = self.make_stage_plugins(self.plugins, i)\n            else:\n                stage_plugins = None\n\n            res_layer = self.make_res_layer(\n                block=self.block,\n                inplanes=self.inplanes,\n                planes=self.stage_widths[i],\n                num_blocks=num_blocks,\n                stride=stride,\n                dilation=dilation,\n                style=self.style,\n                avg_down=self.avg_down,\n                with_cp=self.with_cp,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg,\n                dcn=dcn,\n                plugins=stage_plugins,\n                groups=stage_groups,\n                base_width=group_width,\n                base_channels=self.stage_widths[i],\n                init_cfg=block_init_cfg)\n            self.inplanes = self.stage_widths[i]\n            layer_name = f'layer{i + 1}'\n            self.add_module(layer_name, res_layer)\n            self.res_layers.append(layer_name)\n\n        self._freeze_stages()\n\n        self.feat_dim = stage_widths[-1]\n        self.block.expansion = expansion_bak\n\n    def _make_stem_layer(self, in_channels, base_channels):\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            in_channels,\n            base_channels,\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            bias=False)\n        self.norm1_name, norm1 = build_norm_layer(\n            self.norm_cfg, base_channels, postfix=1)\n        self.add_module(self.norm1_name, norm1)\n        self.relu = nn.ReLU(inplace=True)\n\n    def generate_regnet(self,\n                        initial_width,\n                        width_slope,\n                        width_parameter,\n                        depth,\n                        divisor=8):\n        \"\"\"Generates per block width from RegNet parameters.\n\n        Args:\n            initial_width ([int]): Initial width of the backbone\n            width_slope ([float]): Slope of the quantized linear function\n            width_parameter ([int]): Parameter used to quantize the width.\n            depth ([int]): Depth of the backbone.\n            divisor (int, optional): The divisor of channels. Defaults to 8.\n\n        Returns:\n            list, int: return a list of widths of each stage and the number \\\n                of stages\n        \"\"\"\n        assert width_slope >= 0\n        assert initial_width > 0\n        assert width_parameter > 1\n        assert initial_width % divisor == 0\n        widths_cont = np.arange(depth) * width_slope + initial_width\n        ks = np.round(\n            np.log(widths_cont / initial_width) / np.log(width_parameter))\n        widths = initial_width * np.power(width_parameter, ks)\n        widths = np.round(np.divide(widths, divisor)) * divisor\n        num_stages = len(np.unique(widths))\n        widths, widths_cont = widths.astype(int).tolist(), widths_cont.tolist()\n        return widths, num_stages\n\n    @staticmethod\n    def quantize_float(number, divisor):\n        \"\"\"Converts a float to closest non-zero int divisible by divisor.\n\n        Args:\n            number (int): Original number to be quantized.\n            divisor (int): Divisor used to quantize the number.\n\n        Returns:\n            int: quantized number that is divisible by devisor.\n        \"\"\"\n        return int(round(number / divisor) * divisor)\n\n    def adjust_width_group(self, widths, bottleneck_ratio, groups):\n        \"\"\"Adjusts the compatibility of widths and groups.\n\n        Args:\n            widths (list[int]): Width of each stage.\n            bottleneck_ratio (float): Bottleneck ratio.\n            groups (int): number of groups in each stage\n\n        Returns:\n            tuple(list): The adjusted widths and groups of each stage.\n        \"\"\"\n        bottleneck_width = [\n            int(w * b) for w, b in zip(widths, bottleneck_ratio)\n        ]\n        groups = [min(g, w_bot) for g, w_bot in zip(groups, bottleneck_width)]\n        bottleneck_width = [\n            self.quantize_float(w_bot, g)\n            for w_bot, g in zip(bottleneck_width, groups)\n        ]\n        widths = [\n            int(w_bot / b)\n            for w_bot, b in zip(bottleneck_width, bottleneck_ratio)\n        ]\n        return widths, groups\n\n    def get_stages_from_blocks(self, widths):\n        \"\"\"Gets widths/stage_blocks of network at each stage.\n\n        Args:\n            widths (list[int]): Width in each stage.\n\n        Returns:\n            tuple(list): width and depth of each stage\n        \"\"\"\n        width_diff = [\n            width != width_prev\n            for width, width_prev in zip(widths + [0], [0] + widths)\n        ]\n        stage_widths = [\n            width for width, diff in zip(widths, width_diff[:-1]) if diff\n        ]\n        stage_blocks = np.diff([\n            depth for depth, diff in zip(range(len(width_diff)), width_diff)\n            if diff\n        ]).tolist()\n        return stage_widths, stage_blocks\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        x = self.conv1(x)\n        x = self.norm1(x)\n        x = self.relu(x)\n\n        outs = []\n        for i, layer_name in enumerate(self.res_layers):\n            res_layer = getattr(self, layer_name)\n            x = res_layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/res2net.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import Sequential\n\nfrom ..builder import BACKBONES\nfrom .resnet import Bottleneck as _Bottleneck\nfrom .resnet import ResNet\n\n\nclass Bottle2neck(_Bottleneck):\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 scales=4,\n                 base_width=26,\n                 base_channels=64,\n                 stage_type='normal',\n                 **kwargs):\n        \"\"\"Bottle2neck block for Res2Net.\n\n        If style is \"pytorch\", the stride-two layer is the 3x3 conv layer, if\n        it is \"caffe\", the stride-two layer is the first 1x1 conv layer.\n        \"\"\"\n        super(Bottle2neck, self).__init__(inplanes, planes, **kwargs)\n        assert scales > 1, 'Res2Net degenerates to ResNet when scales = 1.'\n        width = int(math.floor(self.planes * (base_width / base_channels)))\n\n        self.norm1_name, norm1 = build_norm_layer(\n            self.norm_cfg, width * scales, postfix=1)\n        self.norm3_name, norm3 = build_norm_layer(\n            self.norm_cfg, self.planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            self.inplanes,\n            width * scales,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n\n        if stage_type == 'stage' and self.conv2_stride != 1:\n            self.pool = nn.AvgPool2d(\n                kernel_size=3, stride=self.conv2_stride, padding=1)\n        convs = []\n        bns = []\n\n        fallback_on_stride = False\n        if self.with_dcn:\n            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)\n        if not self.with_dcn or fallback_on_stride:\n            for i in range(scales - 1):\n                convs.append(\n                    build_conv_layer(\n                        self.conv_cfg,\n                        width,\n                        width,\n                        kernel_size=3,\n                        stride=self.conv2_stride,\n                        padding=self.dilation,\n                        dilation=self.dilation,\n                        bias=False))\n                bns.append(\n                    build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1])\n            self.convs = nn.ModuleList(convs)\n            self.bns = nn.ModuleList(bns)\n        else:\n            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'\n            for i in range(scales - 1):\n                convs.append(\n                    build_conv_layer(\n                        self.dcn,\n                        width,\n                        width,\n                        kernel_size=3,\n                        stride=self.conv2_stride,\n                        padding=self.dilation,\n                        dilation=self.dilation,\n                        bias=False))\n                bns.append(\n                    build_norm_layer(self.norm_cfg, width, postfix=i + 1)[1])\n            self.convs = nn.ModuleList(convs)\n            self.bns = nn.ModuleList(bns)\n\n        self.conv3 = build_conv_layer(\n            self.conv_cfg,\n            width * scales,\n            self.planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n        self.stage_type = stage_type\n        self.scales = scales\n        self.width = width\n        delattr(self, 'conv2')\n        delattr(self, self.norm2_name)\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n\n        def _inner_forward(x):\n            identity = x\n\n            out = self.conv1(x)\n            out = self.norm1(out)\n            out = self.relu(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv1_plugin_names)\n\n            spx = torch.split(out, self.width, 1)\n            sp = self.convs[0](spx[0].contiguous())\n            sp = self.relu(self.bns[0](sp))\n            out = sp\n            for i in range(1, self.scales - 1):\n                if self.stage_type == 'stage':\n                    sp = spx[i]\n                else:\n                    sp = sp + spx[i]\n                sp = self.convs[i](sp.contiguous())\n                sp = self.relu(self.bns[i](sp))\n                out = torch.cat((out, sp), 1)\n\n            if self.stage_type == 'normal' or self.conv2_stride == 1:\n                out = torch.cat((out, spx[self.scales - 1]), 1)\n            elif self.stage_type == 'stage':\n                out = torch.cat((out, self.pool(spx[self.scales - 1])), 1)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv2_plugin_names)\n\n            out = self.conv3(out)\n            out = self.norm3(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv3_plugin_names)\n\n            if self.downsample is not None:\n                identity = self.downsample(x)\n\n            out += identity\n\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        out = self.relu(out)\n\n        return out\n\n\nclass Res2Layer(Sequential):\n    \"\"\"Res2Layer to build Res2Net style backbone.\n\n    Args:\n        block (nn.Module): block used to build ResLayer.\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        num_blocks (int): number of blocks.\n        stride (int): stride of the first block. Default: 1\n        avg_down (bool): Use AvgPool instead of stride conv when\n            downsampling in the bottle2neck. Default: False\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: dict(type='BN')\n        scales (int): Scales used in Res2Net. Default: 4\n        base_width (int): Basic width of each scale. Default: 26\n    \"\"\"\n\n    def __init__(self,\n                 block,\n                 inplanes,\n                 planes,\n                 num_blocks,\n                 stride=1,\n                 avg_down=True,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 scales=4,\n                 base_width=26,\n                 **kwargs):\n        self.block = block\n\n        downsample = None\n        if stride != 1 or inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.AvgPool2d(\n                    kernel_size=stride,\n                    stride=stride,\n                    ceil_mode=True,\n                    count_include_pad=False),\n                build_conv_layer(\n                    conv_cfg,\n                    inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=1,\n                    bias=False),\n                build_norm_layer(norm_cfg, planes * block.expansion)[1],\n            )\n\n        layers = []\n        layers.append(\n            block(\n                inplanes=inplanes,\n                planes=planes,\n                stride=stride,\n                downsample=downsample,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                scales=scales,\n                base_width=base_width,\n                stage_type='stage',\n                **kwargs))\n        inplanes = planes * block.expansion\n        for i in range(1, num_blocks):\n            layers.append(\n                block(\n                    inplanes=inplanes,\n                    planes=planes,\n                    stride=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    scales=scales,\n                    base_width=base_width,\n                    **kwargs))\n        super(Res2Layer, self).__init__(*layers)\n\n\n@BACKBONES.register_module()\nclass Res2Net(ResNet):\n    \"\"\"Res2Net backbone.\n\n    Args:\n        scales (int): Scales used in Res2Net. Default: 4\n        base_width (int): Basic width of each scale. Default: 26\n        depth (int): Depth of res2net, from {50, 101, 152}.\n        in_channels (int): Number of input image channels. Default: 3.\n        num_stages (int): Res2net stages. Default: 4.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv\n        avg_down (bool): Use AvgPool instead of stride conv when\n            downsampling in the bottle2neck.\n        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).\n            -1 means not freezing any parameters.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        plugins (list[dict]): List of plugins for stages, each dict contains:\n\n            - cfg (dict, required): Cfg dict to build plugin.\n            - position (str, required): Position inside block to insert\n              plugin, options are 'after_conv1', 'after_conv2', 'after_conv3'.\n            - stages (tuple[bool], optional): Stages to apply plugin, length\n              should be same as 'num_stages'.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): Whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    Example:\n        >>> from mmdet.models import Res2Net\n        >>> import torch\n        >>> self = Res2Net(depth=50, scales=4, base_width=26)\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 32, 32)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 256, 8, 8)\n        (1, 512, 4, 4)\n        (1, 1024, 2, 2)\n        (1, 2048, 1, 1)\n    \"\"\"\n\n    arch_settings = {\n        50: (Bottle2neck, (3, 4, 6, 3)),\n        101: (Bottle2neck, (3, 4, 23, 3)),\n        152: (Bottle2neck, (3, 8, 36, 3))\n    }\n\n    def __init__(self,\n                 scales=4,\n                 base_width=26,\n                 style='pytorch',\n                 deep_stem=True,\n                 avg_down=True,\n                 pretrained=None,\n                 init_cfg=None,\n                 **kwargs):\n        self.scales = scales\n        self.base_width = base_width\n        super(Res2Net, self).__init__(\n            style='pytorch',\n            deep_stem=True,\n            avg_down=True,\n            pretrained=pretrained,\n            init_cfg=init_cfg,\n            **kwargs)\n\n    def make_res_layer(self, **kwargs):\n        return Res2Layer(\n            scales=self.scales,\n            base_width=self.base_width,\n            base_channels=self.base_channels,\n            **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/resnest.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import BACKBONES\nfrom ..utils import ResLayer\nfrom .resnet import Bottleneck as _Bottleneck\nfrom .resnet import ResNetV1d\n\n\nclass RSoftmax(nn.Module):\n    \"\"\"Radix Softmax module in ``SplitAttentionConv2d``.\n\n    Args:\n        radix (int): Radix of input.\n        groups (int): Groups of input.\n    \"\"\"\n\n    def __init__(self, radix, groups):\n        super().__init__()\n        self.radix = radix\n        self.groups = groups\n\n    def forward(self, x):\n        batch = x.size(0)\n        if self.radix > 1:\n            x = x.view(batch, self.groups, self.radix, -1).transpose(1, 2)\n            x = F.softmax(x, dim=1)\n            x = x.reshape(batch, -1)\n        else:\n            x = torch.sigmoid(x)\n        return x\n\n\nclass SplitAttentionConv2d(BaseModule):\n    \"\"\"Split-Attention Conv2d in ResNeSt.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        channels (int): Number of intermediate channels.\n        kernel_size (int | tuple[int]): Size of the convolution kernel.\n        stride (int | tuple[int]): Stride of the convolution.\n        padding (int | tuple[int]): Zero-padding added to both sides of\n        dilation (int | tuple[int]): Spacing between kernel elements.\n        groups (int): Number of blocked connections from input channels to\n            output channels.\n        groups (int): Same as nn.Conv2d.\n        radix (int): Radix of SpltAtConv2d. Default: 2\n        reduction_factor (int): Reduction factor of inter_channels. Default: 4.\n        conv_cfg (dict): Config dict for convolution layer. Default: None,\n            which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        dcn (dict): Config dict for DCN. Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 channels,\n                 kernel_size,\n                 stride=1,\n                 padding=0,\n                 dilation=1,\n                 groups=1,\n                 radix=2,\n                 reduction_factor=4,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 dcn=None,\n                 init_cfg=None):\n        super(SplitAttentionConv2d, self).__init__(init_cfg)\n        inter_channels = max(in_channels * radix // reduction_factor, 32)\n        self.radix = radix\n        self.groups = groups\n        self.channels = channels\n        self.with_dcn = dcn is not None\n        self.dcn = dcn\n        fallback_on_stride = False\n        if self.with_dcn:\n            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)\n        if self.with_dcn and not fallback_on_stride:\n            assert conv_cfg is None, 'conv_cfg must be None for DCN'\n            conv_cfg = dcn\n        self.conv = build_conv_layer(\n            conv_cfg,\n            in_channels,\n            channels * radix,\n            kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            groups=groups * radix,\n            bias=False)\n        # To be consistent with original implementation, starting from 0\n        self.norm0_name, norm0 = build_norm_layer(\n            norm_cfg, channels * radix, postfix=0)\n        self.add_module(self.norm0_name, norm0)\n        self.relu = nn.ReLU(inplace=True)\n        self.fc1 = build_conv_layer(\n            None, channels, inter_channels, 1, groups=self.groups)\n        self.norm1_name, norm1 = build_norm_layer(\n            norm_cfg, inter_channels, postfix=1)\n        self.add_module(self.norm1_name, norm1)\n        self.fc2 = build_conv_layer(\n            None, inter_channels, channels * radix, 1, groups=self.groups)\n        self.rsoftmax = RSoftmax(radix, groups)\n\n    @property\n    def norm0(self):\n        \"\"\"nn.Module: the normalization layer named \"norm0\" \"\"\"\n        return getattr(self, self.norm0_name)\n\n    @property\n    def norm1(self):\n        \"\"\"nn.Module: the normalization layer named \"norm1\" \"\"\"\n        return getattr(self, self.norm1_name)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.norm0(x)\n        x = self.relu(x)\n\n        batch, rchannel = x.shape[:2]\n        batch = x.size(0)\n        if self.radix > 1:\n            splits = x.view(batch, self.radix, -1, *x.shape[2:])\n            gap = splits.sum(dim=1)\n        else:\n            gap = x\n        gap = F.adaptive_avg_pool2d(gap, 1)\n        gap = self.fc1(gap)\n\n        gap = self.norm1(gap)\n        gap = self.relu(gap)\n\n        atten = self.fc2(gap)\n        atten = self.rsoftmax(atten).view(batch, -1, 1, 1)\n\n        if self.radix > 1:\n            attens = atten.view(batch, self.radix, -1, *atten.shape[2:])\n            out = torch.sum(attens * splits, dim=1)\n        else:\n            out = atten * x\n        return out.contiguous()\n\n\nclass Bottleneck(_Bottleneck):\n    \"\"\"Bottleneck block for ResNeSt.\n\n    Args:\n        inplane (int): Input planes of this block.\n        planes (int): Middle planes of this block.\n        groups (int): Groups of conv2.\n        base_width (int): Base of width in terms of base channels. Default: 4.\n        base_channels (int): Base of channels for calculating width.\n            Default: 64.\n        radix (int): Radix of SpltAtConv2d. Default: 2\n        reduction_factor (int): Reduction factor of inter_channels in\n            SplitAttentionConv2d. Default: 4.\n        avg_down_stride (bool): Whether to use average pool for stride in\n            Bottleneck. Default: True.\n        kwargs (dict): Key word arguments for base class.\n    \"\"\"\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 groups=1,\n                 base_width=4,\n                 base_channels=64,\n                 radix=2,\n                 reduction_factor=4,\n                 avg_down_stride=True,\n                 **kwargs):\n        \"\"\"Bottleneck block for ResNeSt.\"\"\"\n        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)\n\n        if groups == 1:\n            width = self.planes\n        else:\n            width = math.floor(self.planes *\n                               (base_width / base_channels)) * groups\n\n        self.avg_down_stride = avg_down_stride and self.conv2_stride > 1\n\n        self.norm1_name, norm1 = build_norm_layer(\n            self.norm_cfg, width, postfix=1)\n        self.norm3_name, norm3 = build_norm_layer(\n            self.norm_cfg, self.planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            self.inplanes,\n            width,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        self.with_modulated_dcn = False\n        self.conv2 = SplitAttentionConv2d(\n            width,\n            width,\n            kernel_size=3,\n            stride=1 if self.avg_down_stride else self.conv2_stride,\n            padding=self.dilation,\n            dilation=self.dilation,\n            groups=groups,\n            radix=radix,\n            reduction_factor=reduction_factor,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            dcn=self.dcn)\n        delattr(self, self.norm2_name)\n\n        if self.avg_down_stride:\n            self.avd_layer = nn.AvgPool2d(3, self.conv2_stride, padding=1)\n\n        self.conv3 = build_conv_layer(\n            self.conv_cfg,\n            width,\n            self.planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n    def forward(self, x):\n\n        def _inner_forward(x):\n            identity = x\n\n            out = self.conv1(x)\n            out = self.norm1(out)\n            out = self.relu(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv1_plugin_names)\n\n            out = self.conv2(out)\n\n            if self.avg_down_stride:\n                out = self.avd_layer(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv2_plugin_names)\n\n            out = self.conv3(out)\n            out = self.norm3(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv3_plugin_names)\n\n            if self.downsample is not None:\n                identity = self.downsample(x)\n\n            out += identity\n\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        out = self.relu(out)\n\n        return out\n\n\n@BACKBONES.register_module()\nclass ResNeSt(ResNetV1d):\n    \"\"\"ResNeSt backbone.\n\n    Args:\n        groups (int): Number of groups of Bottleneck. Default: 1\n        base_width (int): Base width of Bottleneck. Default: 4\n        radix (int): Radix of SplitAttentionConv2d. Default: 2\n        reduction_factor (int): Reduction factor of inter_channels in\n            SplitAttentionConv2d. Default: 4.\n        avg_down_stride (bool): Whether to use average pool for stride in\n            Bottleneck. Default: True.\n        kwargs (dict): Keyword arguments for ResNet.\n    \"\"\"\n\n    arch_settings = {\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3)),\n        200: (Bottleneck, (3, 24, 36, 3))\n    }\n\n    def __init__(self,\n                 groups=1,\n                 base_width=4,\n                 radix=2,\n                 reduction_factor=4,\n                 avg_down_stride=True,\n                 **kwargs):\n        self.groups = groups\n        self.base_width = base_width\n        self.radix = radix\n        self.reduction_factor = reduction_factor\n        self.avg_down_stride = avg_down_stride\n        super(ResNeSt, self).__init__(**kwargs)\n\n    def make_res_layer(self, **kwargs):\n        \"\"\"Pack all blocks in a stage into a ``ResLayer``.\"\"\"\n        return ResLayer(\n            groups=self.groups,\n            base_width=self.base_width,\n            base_channels=self.base_channels,\n            radix=self.radix,\n            reduction_factor=self.reduction_factor,\n            avg_down_stride=self.avg_down_stride,\n            **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import build_conv_layer, build_norm_layer, build_plugin_layer\nfrom mmcv.runner import BaseModule\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom ..builder import BACKBONES\nfrom ..utils import ResLayer\n\n\nclass BasicBlock(BaseModule):\n    expansion = 1\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 dilation=1,\n                 downsample=None,\n                 style='pytorch',\n                 with_cp=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 dcn=None,\n                 plugins=None,\n                 init_cfg=None):\n        super(BasicBlock, self).__init__(init_cfg)\n        assert dcn is None, 'Not implemented yet.'\n        assert plugins is None, 'Not implemented yet.'\n\n        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)\n\n        self.conv1 = build_conv_layer(\n            conv_cfg,\n            inplanes,\n            planes,\n            3,\n            stride=stride,\n            padding=dilation,\n            dilation=dilation,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        self.conv2 = build_conv_layer(\n            conv_cfg, planes, planes, 3, padding=1, bias=False)\n        self.add_module(self.norm2_name, norm2)\n\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n        self.dilation = dilation\n        self.with_cp = with_cp\n\n    @property\n    def norm1(self):\n        \"\"\"nn.Module: normalization layer after the first convolution layer\"\"\"\n        return getattr(self, self.norm1_name)\n\n    @property\n    def norm2(self):\n        \"\"\"nn.Module: normalization layer after the second convolution layer\"\"\"\n        return getattr(self, self.norm2_name)\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n\n        def _inner_forward(x):\n            identity = x\n\n            out = self.conv1(x)\n            out = self.norm1(out)\n            out = self.relu(out)\n\n            out = self.conv2(out)\n            out = self.norm2(out)\n\n            if self.downsample is not None:\n                identity = self.downsample(x)\n\n            out += identity\n\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(BaseModule):\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 dilation=1,\n                 downsample=None,\n                 style='pytorch',\n                 with_cp=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 dcn=None,\n                 plugins=None,\n                 init_cfg=None):\n        \"\"\"Bottleneck block for ResNet.\n\n        If style is \"pytorch\", the stride-two layer is the 3x3 conv layer, if\n        it is \"caffe\", the stride-two layer is the first 1x1 conv layer.\n        \"\"\"\n        super(Bottleneck, self).__init__(init_cfg)\n        assert style in ['pytorch', 'caffe']\n        assert dcn is None or isinstance(dcn, dict)\n        assert plugins is None or isinstance(plugins, list)\n        if plugins is not None:\n            allowed_position = ['after_conv1', 'after_conv2', 'after_conv3']\n            assert all(p['position'] in allowed_position for p in plugins)\n\n        self.inplanes = inplanes\n        self.planes = planes\n        self.stride = stride\n        self.dilation = dilation\n        self.style = style\n        self.with_cp = with_cp\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.dcn = dcn\n        self.with_dcn = dcn is not None\n        self.plugins = plugins\n        self.with_plugins = plugins is not None\n\n        if self.with_plugins:\n            # collect plugins for conv1/conv2/conv3\n            self.after_conv1_plugins = [\n                plugin['cfg'] for plugin in plugins\n                if plugin['position'] == 'after_conv1'\n            ]\n            self.after_conv2_plugins = [\n                plugin['cfg'] for plugin in plugins\n                if plugin['position'] == 'after_conv2'\n            ]\n            self.after_conv3_plugins = [\n                plugin['cfg'] for plugin in plugins\n                if plugin['position'] == 'after_conv3'\n            ]\n\n        if self.style == 'pytorch':\n            self.conv1_stride = 1\n            self.conv2_stride = stride\n        else:\n            self.conv1_stride = stride\n            self.conv2_stride = 1\n\n        self.norm1_name, norm1 = build_norm_layer(norm_cfg, planes, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(norm_cfg, planes, postfix=2)\n        self.norm3_name, norm3 = build_norm_layer(\n            norm_cfg, planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            conv_cfg,\n            inplanes,\n            planes,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        fallback_on_stride = False\n        if self.with_dcn:\n            fallback_on_stride = dcn.pop('fallback_on_stride', False)\n        if not self.with_dcn or fallback_on_stride:\n            self.conv2 = build_conv_layer(\n                conv_cfg,\n                planes,\n                planes,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=dilation,\n                dilation=dilation,\n                bias=False)\n        else:\n            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'\n            self.conv2 = build_conv_layer(\n                dcn,\n                planes,\n                planes,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=dilation,\n                dilation=dilation,\n                bias=False)\n\n        self.add_module(self.norm2_name, norm2)\n        self.conv3 = build_conv_layer(\n            conv_cfg,\n            planes,\n            planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n\n        if self.with_plugins:\n            self.after_conv1_plugin_names = self.make_block_plugins(\n                planes, self.after_conv1_plugins)\n            self.after_conv2_plugin_names = self.make_block_plugins(\n                planes, self.after_conv2_plugins)\n            self.after_conv3_plugin_names = self.make_block_plugins(\n                planes * self.expansion, self.after_conv3_plugins)\n\n    def make_block_plugins(self, in_channels, plugins):\n        \"\"\"make plugins for block.\n\n        Args:\n            in_channels (int): Input channels of plugin.\n            plugins (list[dict]): List of plugins cfg to build.\n\n        Returns:\n            list[str]: List of the names of plugin.\n        \"\"\"\n        assert isinstance(plugins, list)\n        plugin_names = []\n        for plugin in plugins:\n            plugin = plugin.copy()\n            name, layer = build_plugin_layer(\n                plugin,\n                in_channels=in_channels,\n                postfix=plugin.pop('postfix', ''))\n            assert not hasattr(self, name), f'duplicate plugin {name}'\n            self.add_module(name, layer)\n            plugin_names.append(name)\n        return plugin_names\n\n    def forward_plugin(self, x, plugin_names):\n        out = x\n        for name in plugin_names:\n            out = getattr(self, name)(out)\n        return out\n\n    @property\n    def norm1(self):\n        \"\"\"nn.Module: normalization layer after the first convolution layer\"\"\"\n        return getattr(self, self.norm1_name)\n\n    @property\n    def norm2(self):\n        \"\"\"nn.Module: normalization layer after the second convolution layer\"\"\"\n        return getattr(self, self.norm2_name)\n\n    @property\n    def norm3(self):\n        \"\"\"nn.Module: normalization layer after the third convolution layer\"\"\"\n        return getattr(self, self.norm3_name)\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n\n        def _inner_forward(x):\n            identity = x\n            out = self.conv1(x)\n            out = self.norm1(out)\n            out = self.relu(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv1_plugin_names)\n\n            out = self.conv2(out)\n            out = self.norm2(out)\n            out = self.relu(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv2_plugin_names)\n\n            out = self.conv3(out)\n            out = self.norm3(out)\n\n            if self.with_plugins:\n                out = self.forward_plugin(out, self.after_conv3_plugin_names)\n\n            if self.downsample is not None:\n                identity = self.downsample(x)\n\n            out += identity\n\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        out = self.relu(out)\n\n        return out\n\n\n@BACKBONES.register_module()\nclass ResNet(BaseModule):\n    \"\"\"ResNet backbone.\n\n    Args:\n        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.\n        stem_channels (int | None): Number of stem channels. If not specified,\n            it will be the same as `base_channels`. Default: None.\n        base_channels (int): Number of base channels of res layer. Default: 64.\n        in_channels (int): Number of input image channels. Default: 3.\n        num_stages (int): Resnet stages. Default: 4.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        deep_stem (bool): Replace 7x7 conv in input stem with 3 3x3 conv\n        avg_down (bool): Use AvgPool instead of stride conv when\n            downsampling in the bottleneck.\n        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).\n            -1 means not freezing any parameters.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        plugins (list[dict]): List of plugins for stages, each dict contains:\n\n            - cfg (dict, required): Cfg dict to build plugin.\n            - position (str, required): Position inside block to insert\n              plugin, options are 'after_conv1', 'after_conv2', 'after_conv3'.\n            - stages (tuple[bool], optional): Stages to apply plugin, length\n              should be same as 'num_stages'.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): Whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    Example:\n        >>> from mmdet.models import ResNet\n        >>> import torch\n        >>> self = ResNet(depth=18)\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 32, 32)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 64, 8, 8)\n        (1, 128, 4, 4)\n        (1, 256, 2, 2)\n        (1, 512, 1, 1)\n    \"\"\"\n\n    arch_settings = {\n        18: (BasicBlock, (2, 2, 2, 2)),\n        34: (BasicBlock, (3, 4, 6, 3)),\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self,\n                 depth,\n                 in_channels=3,\n                 stem_channels=None,\n                 base_channels=64,\n                 num_stages=4,\n                 strides=(1, 2, 2, 2),\n                 dilations=(1, 1, 1, 1),\n                 out_indices=(0, 1, 2, 3),\n                 style='pytorch',\n                 deep_stem=False,\n                 avg_down=False,\n                 frozen_stages=-1,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 norm_eval=True,\n                 dcn=None,\n                 stage_with_dcn=(False, False, False, False),\n                 plugins=None,\n                 with_cp=False,\n                 zero_init_residual=True,\n                 pretrained=None,\n                 init_cfg=None):\n        super(ResNet, self).__init__(init_cfg)\n        self.zero_init_residual = zero_init_residual\n        if depth not in self.arch_settings:\n            raise KeyError(f'invalid depth {depth} for resnet')\n\n        block_init_cfg = None\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n                block = self.arch_settings[depth][0]\n                if self.zero_init_residual:\n                    if block is BasicBlock:\n                        block_init_cfg = dict(\n                            type='Constant',\n                            val=0,\n                            override=dict(name='norm2'))\n                    elif block is Bottleneck:\n                        block_init_cfg = dict(\n                            type='Constant',\n                            val=0,\n                            override=dict(name='norm3'))\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        self.depth = depth\n        if stem_channels is None:\n            stem_channels = base_channels\n        self.stem_channels = stem_channels\n        self.base_channels = base_channels\n        self.num_stages = num_stages\n        assert num_stages >= 1 and num_stages <= 4\n        self.strides = strides\n        self.dilations = dilations\n        assert len(strides) == len(dilations) == num_stages\n        self.out_indices = out_indices\n        assert max(out_indices) < num_stages\n        self.style = style\n        self.deep_stem = deep_stem\n        self.avg_down = avg_down\n        self.frozen_stages = frozen_stages\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.with_cp = with_cp\n        self.norm_eval = norm_eval\n        self.dcn = dcn\n        self.stage_with_dcn = stage_with_dcn\n        if dcn is not None:\n            assert len(stage_with_dcn) == num_stages\n        self.plugins = plugins\n        self.block, stage_blocks = self.arch_settings[depth]\n        self.stage_blocks = stage_blocks[:num_stages]\n        self.inplanes = stem_channels\n\n        self._make_stem_layer(in_channels, stem_channels)\n\n        self.res_layers = []\n        for i, num_blocks in enumerate(self.stage_blocks):\n            stride = strides[i]\n            dilation = dilations[i]\n            dcn = self.dcn if self.stage_with_dcn[i] else None\n            if plugins is not None:\n                stage_plugins = self.make_stage_plugins(plugins, i)\n            else:\n                stage_plugins = None\n            planes = base_channels * 2**i\n            res_layer = self.make_res_layer(\n                block=self.block,\n                inplanes=self.inplanes,\n                planes=planes,\n                num_blocks=num_blocks,\n                stride=stride,\n                dilation=dilation,\n                style=self.style,\n                avg_down=self.avg_down,\n                with_cp=with_cp,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                dcn=dcn,\n                plugins=stage_plugins,\n                init_cfg=block_init_cfg)\n            self.inplanes = planes * self.block.expansion\n            layer_name = f'layer{i + 1}'\n            self.add_module(layer_name, res_layer)\n            self.res_layers.append(layer_name)\n\n        self._freeze_stages()\n\n        self.feat_dim = self.block.expansion * base_channels * 2**(\n            len(self.stage_blocks) - 1)\n\n    def make_stage_plugins(self, plugins, stage_idx):\n        \"\"\"Make plugins for ResNet ``stage_idx`` th stage.\n\n        Currently we support to insert ``context_block``,\n        ``empirical_attention_block``, ``nonlocal_block`` into the backbone\n        like ResNet/ResNeXt. They could be inserted after conv1/conv2/conv3 of\n        Bottleneck.\n\n        An example of plugins format could be:\n\n        Examples:\n            >>> plugins=[\n            ...     dict(cfg=dict(type='xxx', arg1='xxx'),\n            ...          stages=(False, True, True, True),\n            ...          position='after_conv2'),\n            ...     dict(cfg=dict(type='yyy'),\n            ...          stages=(True, True, True, True),\n            ...          position='after_conv3'),\n            ...     dict(cfg=dict(type='zzz', postfix='1'),\n            ...          stages=(True, True, True, True),\n            ...          position='after_conv3'),\n            ...     dict(cfg=dict(type='zzz', postfix='2'),\n            ...          stages=(True, True, True, True),\n            ...          position='after_conv3')\n            ... ]\n            >>> self = ResNet(depth=18)\n            >>> stage_plugins = self.make_stage_plugins(plugins, 0)\n            >>> assert len(stage_plugins) == 3\n\n        Suppose ``stage_idx=0``, the structure of blocks in the stage would be:\n\n        .. code-block:: none\n\n            conv1-> conv2->conv3->yyy->zzz1->zzz2\n\n        Suppose 'stage_idx=1', the structure of blocks in the stage would be:\n\n        .. code-block:: none\n\n            conv1-> conv2->xxx->conv3->yyy->zzz1->zzz2\n\n        If stages is missing, the plugin would be applied to all stages.\n\n        Args:\n            plugins (list[dict]): List of plugins cfg to build. The postfix is\n                required if multiple same type plugins are inserted.\n            stage_idx (int): Index of stage to build\n\n        Returns:\n            list[dict]: Plugins for current stage\n        \"\"\"\n        stage_plugins = []\n        for plugin in plugins:\n            plugin = plugin.copy()\n            stages = plugin.pop('stages', None)\n            assert stages is None or len(stages) == self.num_stages\n            # whether to insert plugin into current stage\n            if stages is None or stages[stage_idx]:\n                stage_plugins.append(plugin)\n\n        return stage_plugins\n\n    def make_res_layer(self, **kwargs):\n        \"\"\"Pack all blocks in a stage into a ``ResLayer``.\"\"\"\n        return ResLayer(**kwargs)\n\n    @property\n    def norm1(self):\n        \"\"\"nn.Module: the normalization layer named \"norm1\" \"\"\"\n        return getattr(self, self.norm1_name)\n\n    def _make_stem_layer(self, in_channels, stem_channels):\n        if self.deep_stem:\n            self.stem = nn.Sequential(\n                build_conv_layer(\n                    self.conv_cfg,\n                    in_channels,\n                    stem_channels // 2,\n                    kernel_size=3,\n                    stride=2,\n                    padding=1,\n                    bias=False),\n                build_norm_layer(self.norm_cfg, stem_channels // 2)[1],\n                nn.ReLU(inplace=True),\n                build_conv_layer(\n                    self.conv_cfg,\n                    stem_channels // 2,\n                    stem_channels // 2,\n                    kernel_size=3,\n                    stride=1,\n                    padding=1,\n                    bias=False),\n                build_norm_layer(self.norm_cfg, stem_channels // 2)[1],\n                nn.ReLU(inplace=True),\n                build_conv_layer(\n                    self.conv_cfg,\n                    stem_channels // 2,\n                    stem_channels,\n                    kernel_size=3,\n                    stride=1,\n                    padding=1,\n                    bias=False),\n                build_norm_layer(self.norm_cfg, stem_channels)[1],\n                nn.ReLU(inplace=True))\n        else:\n            self.conv1 = build_conv_layer(\n                self.conv_cfg,\n                in_channels,\n                stem_channels,\n                kernel_size=7,\n                stride=2,\n                padding=3,\n                bias=False)\n            self.norm1_name, norm1 = build_norm_layer(\n                self.norm_cfg, stem_channels, postfix=1)\n            self.add_module(self.norm1_name, norm1)\n            self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            if self.deep_stem:\n                self.stem.eval()\n                for param in self.stem.parameters():\n                    param.requires_grad = False\n            else:\n                self.norm1.eval()\n                for m in [self.conv1, self.norm1]:\n                    for param in m.parameters():\n                        param.requires_grad = False\n\n        for i in range(1, self.frozen_stages + 1):\n            m = getattr(self, f'layer{i}')\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        if self.deep_stem:\n            x = self.stem(x)\n        else:\n            x = self.conv1(x)\n            x = self.norm1(x)\n            x = self.relu(x)\n        x = self.maxpool(x)\n        outs = []\n        for i, layer_name in enumerate(self.res_layers):\n            res_layer = getattr(self, layer_name)\n            x = res_layer(x)\n            if i in self.out_indices:\n                outs.append(x)\n        return tuple(outs)\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode while keep normalization layer\n        freezed.\"\"\"\n        super(ResNet, self).train(mode)\n        self._freeze_stages()\n        if mode and self.norm_eval:\n            for m in self.modules():\n                # trick: eval have effect on BatchNorm only\n                if isinstance(m, _BatchNorm):\n                    m.eval()\n\n\n@BACKBONES.register_module()\nclass ResNetV1d(ResNet):\n    r\"\"\"ResNetV1d variant described in `Bag of Tricks\n    <https://arxiv.org/pdf/1812.01187.pdf>`_.\n\n    Compared with default ResNet(ResNetV1b), ResNetV1d replaces the 7x7 conv in\n    the input stem with three 3x3 convs. And in the downsampling block, a 2x2\n    avg_pool with stride 2 is added before conv, whose stride is changed to 1.\n    \"\"\"\n\n    def __init__(self, **kwargs):\n        super(ResNetV1d, self).__init__(\n            deep_stem=True, avg_down=True, **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/resnext.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\n\nfrom ..builder import BACKBONES\nfrom ..utils import ResLayer\nfrom .resnet import Bottleneck as _Bottleneck\nfrom .resnet import ResNet\n\n\nclass Bottleneck(_Bottleneck):\n    expansion = 4\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 groups=1,\n                 base_width=4,\n                 base_channels=64,\n                 **kwargs):\n        \"\"\"Bottleneck block for ResNeXt.\n\n        If style is \"pytorch\", the stride-two layer is the 3x3 conv layer, if\n        it is \"caffe\", the stride-two layer is the first 1x1 conv layer.\n        \"\"\"\n        super(Bottleneck, self).__init__(inplanes, planes, **kwargs)\n\n        if groups == 1:\n            width = self.planes\n        else:\n            width = math.floor(self.planes *\n                               (base_width / base_channels)) * groups\n\n        self.norm1_name, norm1 = build_norm_layer(\n            self.norm_cfg, width, postfix=1)\n        self.norm2_name, norm2 = build_norm_layer(\n            self.norm_cfg, width, postfix=2)\n        self.norm3_name, norm3 = build_norm_layer(\n            self.norm_cfg, self.planes * self.expansion, postfix=3)\n\n        self.conv1 = build_conv_layer(\n            self.conv_cfg,\n            self.inplanes,\n            width,\n            kernel_size=1,\n            stride=self.conv1_stride,\n            bias=False)\n        self.add_module(self.norm1_name, norm1)\n        fallback_on_stride = False\n        self.with_modulated_dcn = False\n        if self.with_dcn:\n            fallback_on_stride = self.dcn.pop('fallback_on_stride', False)\n        if not self.with_dcn or fallback_on_stride:\n            self.conv2 = build_conv_layer(\n                self.conv_cfg,\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                bias=False)\n        else:\n            assert self.conv_cfg is None, 'conv_cfg must be None for DCN'\n            self.conv2 = build_conv_layer(\n                self.dcn,\n                width,\n                width,\n                kernel_size=3,\n                stride=self.conv2_stride,\n                padding=self.dilation,\n                dilation=self.dilation,\n                groups=groups,\n                bias=False)\n\n        self.add_module(self.norm2_name, norm2)\n        self.conv3 = build_conv_layer(\n            self.conv_cfg,\n            width,\n            self.planes * self.expansion,\n            kernel_size=1,\n            bias=False)\n        self.add_module(self.norm3_name, norm3)\n\n        if self.with_plugins:\n            self._del_block_plugins(self.after_conv1_plugin_names +\n                                    self.after_conv2_plugin_names +\n                                    self.after_conv3_plugin_names)\n            self.after_conv1_plugin_names = self.make_block_plugins(\n                width, self.after_conv1_plugins)\n            self.after_conv2_plugin_names = self.make_block_plugins(\n                width, self.after_conv2_plugins)\n            self.after_conv3_plugin_names = self.make_block_plugins(\n                self.planes * self.expansion, self.after_conv3_plugins)\n\n    def _del_block_plugins(self, plugin_names):\n        \"\"\"delete plugins for block if exist.\n\n        Args:\n            plugin_names (list[str]): List of plugins name to delete.\n        \"\"\"\n        assert isinstance(plugin_names, list)\n        for plugin_name in plugin_names:\n            del self._modules[plugin_name]\n\n\n@BACKBONES.register_module()\nclass ResNeXt(ResNet):\n    \"\"\"ResNeXt backbone.\n\n    Args:\n        depth (int): Depth of resnet, from {18, 34, 50, 101, 152}.\n        in_channels (int): Number of input image channels. Default: 3.\n        num_stages (int): Resnet stages. Default: 4.\n        groups (int): Group of resnext.\n        base_width (int): Base width of resnext.\n        strides (Sequence[int]): Strides of the first block of each stage.\n        dilations (Sequence[int]): Dilation of each stage.\n        out_indices (Sequence[int]): Output from which stages.\n        style (str): `pytorch` or `caffe`. If set to \"pytorch\", the stride-two\n            layer is the 3x3 conv layer, otherwise the stride-two layer is\n            the first 1x1 conv layer.\n        frozen_stages (int): Stages to be frozen (all param fixed). -1 means\n            not freezing any parameters.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n        norm_eval (bool): Whether to set norm layers to eval mode, namely,\n            freeze running stats (mean and var). Note: Effect on Batch Norm\n            and its variants only.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        zero_init_residual (bool): whether to use zero init for last norm layer\n            in resblocks to let them behave as identity.\n    \"\"\"\n\n    arch_settings = {\n        50: (Bottleneck, (3, 4, 6, 3)),\n        101: (Bottleneck, (3, 4, 23, 3)),\n        152: (Bottleneck, (3, 8, 36, 3))\n    }\n\n    def __init__(self, groups=1, base_width=4, **kwargs):\n        self.groups = groups\n        self.base_width = base_width\n        super(ResNeXt, self).__init__(**kwargs)\n\n    def make_res_layer(self, **kwargs):\n        \"\"\"Pack all blocks in a stage into a ``ResLayer``\"\"\"\n        return ResLayer(\n            groups=self.groups,\n            base_width=self.base_width,\n            base_channels=self.base_channels,\n            **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/ssd_vgg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch.nn as nn\nfrom mmcv.cnn import VGG\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import BACKBONES\nfrom ..necks import ssd_neck\n\n\n@BACKBONES.register_module()\nclass SSDVGG(VGG, BaseModule):\n    \"\"\"VGG Backbone network for single-shot-detection.\n\n    Args:\n        depth (int): Depth of vgg, from {11, 13, 16, 19}.\n        with_last_pool (bool): Whether to add a pooling layer at the last\n            of the model\n        ceil_mode (bool): When True, will use `ceil` instead of `floor`\n            to compute the output shape.\n        out_indices (Sequence[int]): Output from which stages.\n        out_feature_indices (Sequence[int]): Output from which feature map.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n        input_size (int, optional): Deprecated argumment.\n            Width and height of input, from {300, 512}.\n        l2_norm_scale (float, optional) : Deprecated argumment.\n            L2 normalization layer init scale.\n\n    Example:\n        >>> self = SSDVGG(input_size=300, depth=11)\n        >>> self.eval()\n        >>> inputs = torch.rand(1, 3, 300, 300)\n        >>> level_outputs = self.forward(inputs)\n        >>> for level_out in level_outputs:\n        ...     print(tuple(level_out.shape))\n        (1, 1024, 19, 19)\n        (1, 512, 10, 10)\n        (1, 256, 5, 5)\n        (1, 256, 3, 3)\n        (1, 256, 1, 1)\n    \"\"\"\n    extra_setting = {\n        300: (256, 'S', 512, 128, 'S', 256, 128, 256, 128, 256),\n        512: (256, 'S', 512, 128, 'S', 256, 128, 'S', 256, 128, 'S', 256, 128),\n    }\n\n    def __init__(self,\n                 depth,\n                 with_last_pool=False,\n                 ceil_mode=True,\n                 out_indices=(3, 4),\n                 out_feature_indices=(22, 34),\n                 pretrained=None,\n                 init_cfg=None,\n                 input_size=None,\n                 l2_norm_scale=None):\n        # TODO: in_channels for mmcv.VGG\n        super(SSDVGG, self).__init__(\n            depth,\n            with_last_pool=with_last_pool,\n            ceil_mode=ceil_mode,\n            out_indices=out_indices)\n\n        self.features.add_module(\n            str(len(self.features)),\n            nn.MaxPool2d(kernel_size=3, stride=1, padding=1))\n        self.features.add_module(\n            str(len(self.features)),\n            nn.Conv2d(512, 1024, kernel_size=3, padding=6, dilation=6))\n        self.features.add_module(\n            str(len(self.features)), nn.ReLU(inplace=True))\n        self.features.add_module(\n            str(len(self.features)), nn.Conv2d(1024, 1024, kernel_size=1))\n        self.features.add_module(\n            str(len(self.features)), nn.ReLU(inplace=True))\n        self.out_feature_indices = out_feature_indices\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n\n        if init_cfg is not None:\n            self.init_cfg = init_cfg\n        elif isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            self.init_cfg = [\n                dict(type='Kaiming', layer='Conv2d'),\n                dict(type='Constant', val=1, layer='BatchNorm2d'),\n                dict(type='Normal', std=0.01, layer='Linear'),\n            ]\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        if input_size is not None:\n            warnings.warn('DeprecationWarning: input_size is deprecated')\n        if l2_norm_scale is not None:\n            warnings.warn('DeprecationWarning: l2_norm_scale in VGG is '\n                          'deprecated, it has been moved to SSDNeck.')\n\n    def init_weights(self, pretrained=None):\n        super(VGG, self).init_weights()\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        outs = []\n        for i, layer in enumerate(self.features):\n            x = layer(x)\n            if i in self.out_feature_indices:\n                outs.append(x)\n\n        if len(outs) == 1:\n            return outs[0]\n        else:\n            return tuple(outs)\n\n\nclass L2Norm(ssd_neck.L2Norm):\n\n    def __init__(self, **kwargs):\n        super(L2Norm, self).__init__(**kwargs)\n        warnings.warn('DeprecationWarning: L2Norm in ssd_vgg.py '\n                      'is deprecated, please use L2Norm in '\n                      'mmdet/models/necks/ssd_neck.py instead')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/swin.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom collections import OrderedDict\nfrom copy import deepcopy\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import build_norm_layer, constant_init, trunc_normal_init\nfrom mmcv.cnn.bricks.transformer import FFN, build_dropout\nfrom mmcv.cnn.utils.weight_init import trunc_normal_\nfrom mmcv.runner import BaseModule, ModuleList, _load_checkpoint\nfrom mmcv.utils import to_2tuple\n\nfrom ...utils import get_root_logger\nfrom ..builder import BACKBONES\nfrom ..utils.ckpt_convert import swin_converter\nfrom ..utils.transformer import PatchEmbed, PatchMerging\n\n\nclass WindowMSA(BaseModule):\n    \"\"\"Window based multi-head self-attention (W-MSA) module with relative\n    position bias.\n\n    Args:\n        embed_dims (int): Number of input channels.\n        num_heads (int): Number of attention heads.\n        window_size (tuple[int]): The height and width of the window.\n        qkv_bias (bool, optional):  If True, add a learnable bias to q, k, v.\n            Default: True.\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        attn_drop_rate (float, optional): Dropout ratio of attention weight.\n            Default: 0.0\n        proj_drop_rate (float, optional): Dropout ratio of output. Default: 0.\n        init_cfg (dict | None, optional): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 window_size,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 attn_drop_rate=0.,\n                 proj_drop_rate=0.,\n                 init_cfg=None):\n\n        super().__init__()\n        self.embed_dims = embed_dims\n        self.window_size = window_size  # Wh, Ww\n        self.num_heads = num_heads\n        head_embed_dims = embed_dims // num_heads\n        self.scale = qk_scale or head_embed_dims**-0.5\n        self.init_cfg = init_cfg\n\n        # define a parameter table of relative position bias\n        self.relative_position_bias_table = nn.Parameter(\n            torch.zeros((2 * window_size[0] - 1) * (2 * window_size[1] - 1),\n                        num_heads))  # 2*Wh-1 * 2*Ww-1, nH\n\n        # About 2x faster than original impl\n        Wh, Ww = self.window_size\n        rel_index_coords = self.double_step_seq(2 * Ww - 1, Wh, 1, Ww)\n        rel_position_index = rel_index_coords + rel_index_coords.T\n        rel_position_index = rel_position_index.flip(1).contiguous()\n        self.register_buffer('relative_position_index', rel_position_index)\n\n        self.qkv = nn.Linear(embed_dims, embed_dims * 3, bias=qkv_bias)\n        self.attn_drop = nn.Dropout(attn_drop_rate)\n        self.proj = nn.Linear(embed_dims, embed_dims)\n        self.proj_drop = nn.Dropout(proj_drop_rate)\n\n        self.softmax = nn.Softmax(dim=-1)\n\n    def init_weights(self):\n        trunc_normal_(self.relative_position_bias_table, std=0.02)\n\n    def forward(self, x, mask=None):\n        \"\"\"\n        Args:\n\n            x (tensor): input features with shape of (num_windows*B, N, C)\n            mask (tensor | None, Optional): mask with shape of (num_windows,\n                Wh*Ww, Wh*Ww), value should be between (-inf, 0].\n        \"\"\"\n        B, N, C = x.shape\n        qkv = self.qkv(x).reshape(B, N, 3, self.num_heads,\n                                  C // self.num_heads).permute(2, 0, 3, 1, 4)\n        # make torchscript happy (cannot use tensor as tuple)\n        q, k, v = qkv[0], qkv[1], qkv[2]\n\n        q = q * self.scale\n        attn = (q @ k.transpose(-2, -1))\n\n        relative_position_bias = self.relative_position_bias_table[\n            self.relative_position_index.view(-1)].view(\n                self.window_size[0] * self.window_size[1],\n                self.window_size[0] * self.window_size[1],\n                -1)  # Wh*Ww,Wh*Ww,nH\n        relative_position_bias = relative_position_bias.permute(\n            2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww\n        attn = attn + relative_position_bias.unsqueeze(0)\n\n        if mask is not None:\n            nW = mask.shape[0]\n            attn = attn.view(B // nW, nW, self.num_heads, N,\n                             N) + mask.unsqueeze(1).unsqueeze(0)\n            attn = attn.view(-1, self.num_heads, N, N)\n        attn = self.softmax(attn)\n\n        attn = self.attn_drop(attn)\n\n        x = (attn @ v).transpose(1, 2).reshape(B, N, C)\n        x = self.proj(x)\n        x = self.proj_drop(x)\n        return x\n\n    @staticmethod\n    def double_step_seq(step1, len1, step2, len2):\n        seq1 = torch.arange(0, step1 * len1, step1)\n        seq2 = torch.arange(0, step2 * len2, step2)\n        return (seq1[:, None] + seq2[None, :]).reshape(1, -1)\n\n\nclass ShiftWindowMSA(BaseModule):\n    \"\"\"Shifted Window Multihead Self-Attention Module.\n\n    Args:\n        embed_dims (int): Number of input channels.\n        num_heads (int): Number of attention heads.\n        window_size (int): The height and width of the window.\n        shift_size (int, optional): The shift step of each window towards\n            right-bottom. If zero, act as regular window-msa. Defaults to 0.\n        qkv_bias (bool, optional): If True, add a learnable bias to q, k, v.\n            Default: True\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Defaults: None.\n        attn_drop_rate (float, optional): Dropout ratio of attention weight.\n            Defaults: 0.\n        proj_drop_rate (float, optional): Dropout ratio of output.\n            Defaults: 0.\n        dropout_layer (dict, optional): The dropout_layer used before output.\n            Defaults: dict(type='DropPath', drop_prob=0.).\n        init_cfg (dict, optional): The extra config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 window_size,\n                 shift_size=0,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 attn_drop_rate=0,\n                 proj_drop_rate=0,\n                 dropout_layer=dict(type='DropPath', drop_prob=0.),\n                 init_cfg=None):\n        super().__init__(init_cfg)\n\n        self.window_size = window_size\n        self.shift_size = shift_size\n        assert 0 <= self.shift_size < self.window_size\n\n        self.w_msa = WindowMSA(\n            embed_dims=embed_dims,\n            num_heads=num_heads,\n            window_size=to_2tuple(window_size),\n            qkv_bias=qkv_bias,\n            qk_scale=qk_scale,\n            attn_drop_rate=attn_drop_rate,\n            proj_drop_rate=proj_drop_rate,\n            init_cfg=None)\n\n        self.drop = build_dropout(dropout_layer)\n\n    def forward(self, query, hw_shape):\n        B, L, C = query.shape\n        H, W = hw_shape\n        assert L == H * W, 'input feature has wrong size'\n        query = query.view(B, H, W, C)\n\n        # pad feature maps to multiples of window size\n        pad_r = (self.window_size - W % self.window_size) % self.window_size\n        pad_b = (self.window_size - H % self.window_size) % self.window_size\n        query = F.pad(query, (0, 0, 0, pad_r, 0, pad_b))\n        H_pad, W_pad = query.shape[1], query.shape[2]\n\n        # cyclic shift\n        if self.shift_size > 0:\n            shifted_query = torch.roll(\n                query,\n                shifts=(-self.shift_size, -self.shift_size),\n                dims=(1, 2))\n\n            # calculate attention mask for SW-MSA\n            img_mask = torch.zeros((1, H_pad, W_pad, 1), device=query.device)\n            h_slices = (slice(0, -self.window_size),\n                        slice(-self.window_size,\n                              -self.shift_size), slice(-self.shift_size, None))\n            w_slices = (slice(0, -self.window_size),\n                        slice(-self.window_size,\n                              -self.shift_size), slice(-self.shift_size, None))\n            cnt = 0\n            for h in h_slices:\n                for w in w_slices:\n                    img_mask[:, h, w, :] = cnt\n                    cnt += 1\n\n            # nW, window_size, window_size, 1\n            mask_windows = self.window_partition(img_mask)\n            mask_windows = mask_windows.view(\n                -1, self.window_size * self.window_size)\n            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)\n            attn_mask = attn_mask.masked_fill(attn_mask != 0,\n                                              float(-100.0)).masked_fill(\n                                                  attn_mask == 0, float(0.0))\n        else:\n            shifted_query = query\n            attn_mask = None\n\n        # nW*B, window_size, window_size, C\n        query_windows = self.window_partition(shifted_query)\n        # nW*B, window_size*window_size, C\n        query_windows = query_windows.view(-1, self.window_size**2, C)\n\n        # W-MSA/SW-MSA (nW*B, window_size*window_size, C)\n        attn_windows = self.w_msa(query_windows, mask=attn_mask)\n\n        # merge windows\n        attn_windows = attn_windows.view(-1, self.window_size,\n                                         self.window_size, C)\n\n        # B H' W' C\n        shifted_x = self.window_reverse(attn_windows, H_pad, W_pad)\n        # reverse cyclic shift\n        if self.shift_size > 0:\n            x = torch.roll(\n                shifted_x,\n                shifts=(self.shift_size, self.shift_size),\n                dims=(1, 2))\n        else:\n            x = shifted_x\n\n        if pad_r > 0 or pad_b:\n            x = x[:, :H, :W, :].contiguous()\n\n        x = x.view(B, H * W, C)\n\n        x = self.drop(x)\n        return x\n\n    def window_reverse(self, windows, H, W):\n        \"\"\"\n        Args:\n            windows: (num_windows*B, window_size, window_size, C)\n            H (int): Height of image\n            W (int): Width of image\n        Returns:\n            x: (B, H, W, C)\n        \"\"\"\n        window_size = self.window_size\n        B = int(windows.shape[0] / (H * W / window_size / window_size))\n        x = windows.view(B, H // window_size, W // window_size, window_size,\n                         window_size, -1)\n        x = x.permute(0, 1, 3, 2, 4, 5).contiguous().view(B, H, W, -1)\n        return x\n\n    def window_partition(self, x):\n        \"\"\"\n        Args:\n            x: (B, H, W, C)\n        Returns:\n            windows: (num_windows*B, window_size, window_size, C)\n        \"\"\"\n        B, H, W, C = x.shape\n        window_size = self.window_size\n        x = x.view(B, H // window_size, window_size, W // window_size,\n                   window_size, C)\n        windows = x.permute(0, 1, 3, 2, 4, 5).contiguous()\n        windows = windows.view(-1, window_size, window_size, C)\n        return windows\n\n\nclass SwinBlock(BaseModule):\n    \"\"\"\"\n    Args:\n        embed_dims (int): The feature dimension.\n        num_heads (int): Parallel attention heads.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        window_size (int, optional): The local window scale. Default: 7.\n        shift (bool, optional): whether to shift window or not. Default False.\n        qkv_bias (bool, optional): enable bias for qkv if True. Default: True.\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        drop_rate (float, optional): Dropout rate. Default: 0.\n        attn_drop_rate (float, optional): Attention dropout rate. Default: 0.\n        drop_path_rate (float, optional): Stochastic depth rate. Default: 0.\n        act_cfg (dict, optional): The config dict of activation function.\n            Default: dict(type='GELU').\n        norm_cfg (dict, optional): The config dict of normalization.\n            Default: dict(type='LN').\n        with_cp (bool, optional): Use checkpoint or not. Using checkpoint\n            will save some memory while slowing down the training speed.\n            Default: False.\n        init_cfg (dict | list | None, optional): The init config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 feedforward_channels,\n                 window_size=7,\n                 shift=False,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 with_cp=False,\n                 init_cfg=None):\n\n        super(SwinBlock, self).__init__()\n\n        self.init_cfg = init_cfg\n        self.with_cp = with_cp\n\n        self.norm1 = build_norm_layer(norm_cfg, embed_dims)[1]\n        self.attn = ShiftWindowMSA(\n            embed_dims=embed_dims,\n            num_heads=num_heads,\n            window_size=window_size,\n            shift_size=window_size // 2 if shift else 0,\n            qkv_bias=qkv_bias,\n            qk_scale=qk_scale,\n            attn_drop_rate=attn_drop_rate,\n            proj_drop_rate=drop_rate,\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\n            init_cfg=None)\n\n        self.norm2 = build_norm_layer(norm_cfg, embed_dims)[1]\n        self.ffn = FFN(\n            embed_dims=embed_dims,\n            feedforward_channels=feedforward_channels,\n            num_fcs=2,\n            ffn_drop=drop_rate,\n            dropout_layer=dict(type='DropPath', drop_prob=drop_path_rate),\n            act_cfg=act_cfg,\n            add_identity=True,\n            init_cfg=None)\n\n    def forward(self, x, hw_shape):\n\n        def _inner_forward(x):\n            identity = x\n            x = self.norm1(x)\n            x = self.attn(x, hw_shape)\n\n            x = x + identity\n\n            identity = x\n            x = self.norm2(x)\n            x = self.ffn(x, identity=identity)\n\n            return x\n\n        if self.with_cp and x.requires_grad:\n            x = cp.checkpoint(_inner_forward, x)\n        else:\n            x = _inner_forward(x)\n\n        return x\n\n\nclass SwinBlockSequence(BaseModule):\n    \"\"\"Implements one stage in Swin Transformer.\n\n    Args:\n        embed_dims (int): The feature dimension.\n        num_heads (int): Parallel attention heads.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        depth (int): The number of blocks in this stage.\n        window_size (int, optional): The local window scale. Default: 7.\n        qkv_bias (bool, optional): enable bias for qkv if True. Default: True.\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        drop_rate (float, optional): Dropout rate. Default: 0.\n        attn_drop_rate (float, optional): Attention dropout rate. Default: 0.\n        drop_path_rate (float | list[float], optional): Stochastic depth\n            rate. Default: 0.\n        downsample (BaseModule | None, optional): The downsample operation\n            module. Default: None.\n        act_cfg (dict, optional): The config dict of activation function.\n            Default: dict(type='GELU').\n        norm_cfg (dict, optional): The config dict of normalization.\n            Default: dict(type='LN').\n        with_cp (bool, optional): Use checkpoint or not. Using checkpoint\n            will save some memory while slowing down the training speed.\n            Default: False.\n        init_cfg (dict | list | None, optional): The init config.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 embed_dims,\n                 num_heads,\n                 feedforward_channels,\n                 depth,\n                 window_size=7,\n                 qkv_bias=True,\n                 qk_scale=None,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.,\n                 downsample=None,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 with_cp=False,\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n\n        if isinstance(drop_path_rate, list):\n            drop_path_rates = drop_path_rate\n            assert len(drop_path_rates) == depth\n        else:\n            drop_path_rates = [deepcopy(drop_path_rate) for _ in range(depth)]\n\n        self.blocks = ModuleList()\n        for i in range(depth):\n            block = SwinBlock(\n                embed_dims=embed_dims,\n                num_heads=num_heads,\n                feedforward_channels=feedforward_channels,\n                window_size=window_size,\n                shift=False if i % 2 == 0 else True,\n                qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop_rate=drop_rate,\n                attn_drop_rate=attn_drop_rate,\n                drop_path_rate=drop_path_rates[i],\n                act_cfg=act_cfg,\n                norm_cfg=norm_cfg,\n                with_cp=with_cp,\n                init_cfg=None)\n            self.blocks.append(block)\n\n        self.downsample = downsample\n\n    def forward(self, x, hw_shape):\n        for block in self.blocks:\n            x = block(x, hw_shape)\n\n        if self.downsample:\n            x_down, down_hw_shape = self.downsample(x, hw_shape)\n            return x_down, down_hw_shape, x, hw_shape\n        else:\n            return x, hw_shape, x, hw_shape\n\n\n@BACKBONES.register_module()\nclass SwinTransformer(BaseModule):\n    \"\"\" Swin Transformer\n    A PyTorch implement of : `Swin Transformer:\n    Hierarchical Vision Transformer using Shifted Windows`  -\n        https://arxiv.org/abs/2103.14030\n\n    Inspiration from\n    https://github.com/microsoft/Swin-Transformer\n\n    Args:\n        pretrain_img_size (int | tuple[int]): The size of input image when\n            pretrain. Defaults: 224.\n        in_channels (int): The num of input channels.\n            Defaults: 3.\n        embed_dims (int): The feature dimension. Default: 96.\n        patch_size (int | tuple[int]): Patch size. Default: 4.\n        window_size (int): Window size. Default: 7.\n        mlp_ratio (int): Ratio of mlp hidden dim to embedding dim.\n            Default: 4.\n        depths (tuple[int]): Depths of each Swin Transformer stage.\n            Default: (2, 2, 6, 2).\n        num_heads (tuple[int]): Parallel attention heads of each Swin\n            Transformer stage. Default: (3, 6, 12, 24).\n        strides (tuple[int]): The patch merging or patch embedding stride of\n            each Swin Transformer stage. (In swin, we set kernel size equal to\n            stride.) Default: (4, 2, 2, 2).\n        out_indices (tuple[int]): Output from which stages.\n            Default: (0, 1, 2, 3).\n        qkv_bias (bool, optional): If True, add a learnable bias to query, key,\n            value. Default: True\n        qk_scale (float | None, optional): Override default qk scale of\n            head_dim ** -0.5 if set. Default: None.\n        patch_norm (bool): If add a norm layer for patch embed and patch\n            merging. Default: True.\n        drop_rate (float): Dropout rate. Defaults: 0.\n        attn_drop_rate (float): Attention dropout rate. Default: 0.\n        drop_path_rate (float): Stochastic depth rate. Defaults: 0.1.\n        use_abs_pos_embed (bool): If True, add absolute position embedding to\n            the patch embedding. Defaults: False.\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='GELU').\n        norm_cfg (dict): Config dict for normalization layer at\n            output of backone. Defaults: dict(type='LN').\n        with_cp (bool, optional): Use checkpoint or not. Using checkpoint\n            will save some memory while slowing down the training speed.\n            Default: False.\n        pretrained (str, optional): model pretrained path. Default: None.\n        convert_weights (bool): The flag indicates whether the\n            pre-trained model is from the original repo. We may need\n            to convert some keys to make it compatible.\n            Default: False.\n        frozen_stages (int): Stages to be frozen (stop grad and set eval mode).\n            Default: -1 (-1 means not freezing any parameters).\n        init_cfg (dict, optional): The Config for initialization.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 pretrain_img_size=224,\n                 in_channels=3,\n                 embed_dims=96,\n                 patch_size=4,\n                 window_size=7,\n                 mlp_ratio=4,\n                 depths=(2, 2, 6, 2),\n                 num_heads=(3, 6, 12, 24),\n                 strides=(4, 2, 2, 2),\n                 out_indices=(0, 1, 2, 3),\n                 qkv_bias=True,\n                 qk_scale=None,\n                 patch_norm=True,\n                 drop_rate=0.,\n                 attn_drop_rate=0.,\n                 drop_path_rate=0.1,\n                 use_abs_pos_embed=False,\n                 act_cfg=dict(type='GELU'),\n                 norm_cfg=dict(type='LN'),\n                 with_cp=False,\n                 pretrained=None,\n                 convert_weights=False,\n                 frozen_stages=-1,\n                 init_cfg=None):\n        self.convert_weights = convert_weights\n        self.frozen_stages = frozen_stages\n        if isinstance(pretrain_img_size, int):\n            pretrain_img_size = to_2tuple(pretrain_img_size)\n        elif isinstance(pretrain_img_size, tuple):\n            if len(pretrain_img_size) == 1:\n                pretrain_img_size = to_2tuple(pretrain_img_size[0])\n            assert len(pretrain_img_size) == 2, \\\n                f'The size of image should have length 1 or 2, ' \\\n                f'but got {len(pretrain_img_size)}'\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            self.init_cfg = init_cfg\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n        super(SwinTransformer, self).__init__(init_cfg=init_cfg)\n\n        num_layers = len(depths)\n        self.out_indices = out_indices\n        self.use_abs_pos_embed = use_abs_pos_embed\n\n        assert strides[0] == patch_size, 'Use non-overlapping patch embed.'\n\n        self.patch_embed = PatchEmbed(\n            in_channels=in_channels,\n            embed_dims=embed_dims,\n            conv_type='Conv2d',\n            kernel_size=patch_size,\n            stride=strides[0],\n            norm_cfg=norm_cfg if patch_norm else None,\n            init_cfg=None)\n\n        if self.use_abs_pos_embed:\n            patch_row = pretrain_img_size[0] // patch_size\n            patch_col = pretrain_img_size[1] // patch_size\n            self.absolute_pos_embed = nn.Parameter(\n                torch.zeros((1, embed_dims, patch_row, patch_col)))\n\n        self.drop_after_pos = nn.Dropout(p=drop_rate)\n\n        # set stochastic depth decay rule\n        total_depth = sum(depths)\n        dpr = [\n            x.item() for x in torch.linspace(0, drop_path_rate, total_depth)\n        ]\n\n        self.stages = ModuleList()\n        in_channels = embed_dims\n        for i in range(num_layers):\n            if i < num_layers - 1:\n                downsample = PatchMerging(\n                    in_channels=in_channels,\n                    out_channels=2 * in_channels,\n                    stride=strides[i + 1],\n                    norm_cfg=norm_cfg if patch_norm else None,\n                    init_cfg=None)\n            else:\n                downsample = None\n\n            stage = SwinBlockSequence(\n                embed_dims=in_channels,\n                num_heads=num_heads[i],\n                feedforward_channels=mlp_ratio * in_channels,\n                depth=depths[i],\n                window_size=window_size,\n                qkv_bias=qkv_bias,\n                qk_scale=qk_scale,\n                drop_rate=drop_rate,\n                attn_drop_rate=attn_drop_rate,\n                drop_path_rate=dpr[sum(depths[:i]):sum(depths[:i + 1])],\n                downsample=downsample,\n                act_cfg=act_cfg,\n                norm_cfg=norm_cfg,\n                with_cp=with_cp,\n                init_cfg=None)\n            self.stages.append(stage)\n            if downsample:\n                in_channels = downsample.out_channels\n\n        self.num_features = [int(embed_dims * 2**i) for i in range(num_layers)]\n        # Add a norm layer for each output\n        for i in out_indices:\n            layer = build_norm_layer(norm_cfg, self.num_features[i])[1]\n            layer_name = f'norm{i}'\n            self.add_module(layer_name, layer)\n\n    def train(self, mode=True):\n        \"\"\"Convert the model into training mode while keep layers freezed.\"\"\"\n        super(SwinTransformer, self).train(mode)\n        self._freeze_stages()\n\n    def _freeze_stages(self):\n        if self.frozen_stages >= 0:\n            self.patch_embed.eval()\n            for param in self.patch_embed.parameters():\n                param.requires_grad = False\n            if self.use_abs_pos_embed:\n                self.absolute_pos_embed.requires_grad = False\n            self.drop_after_pos.eval()\n\n        for i in range(1, self.frozen_stages + 1):\n\n            if (i - 1) in self.out_indices:\n                norm_layer = getattr(self, f'norm{i-1}')\n                norm_layer.eval()\n                for param in norm_layer.parameters():\n                    param.requires_grad = False\n\n            m = self.stages[i - 1]\n            m.eval()\n            for param in m.parameters():\n                param.requires_grad = False\n\n    def init_weights(self):\n        logger = get_root_logger()\n        if self.init_cfg is None:\n            logger.warn(f'No pre-trained weights for '\n                        f'{self.__class__.__name__}, '\n                        f'training start from scratch')\n            if self.use_abs_pos_embed:\n                trunc_normal_(self.absolute_pos_embed, std=0.02)\n            for m in self.modules():\n                if isinstance(m, nn.Linear):\n                    trunc_normal_init(m, std=.02, bias=0.)\n                elif isinstance(m, nn.LayerNorm):\n                    constant_init(m, 1.0)\n        else:\n            assert 'checkpoint' in self.init_cfg, f'Only support ' \\\n                                                  f'specify `Pretrained` in ' \\\n                                                  f'`init_cfg` in ' \\\n                                                  f'{self.__class__.__name__} '\n            ckpt = _load_checkpoint(\n                self.init_cfg.checkpoint, logger=logger, map_location='cpu')\n            if 'state_dict' in ckpt:\n                _state_dict = ckpt['state_dict']\n            elif 'model' in ckpt:\n                _state_dict = ckpt['model']\n            else:\n                _state_dict = ckpt\n            if self.convert_weights:\n                # supported loading weight from original repo,\n                _state_dict = swin_converter(_state_dict)\n\n            state_dict = OrderedDict()\n            for k, v in _state_dict.items():\n                if k.startswith('backbone.'):\n                    state_dict[k[9:]] = v\n\n            # strip prefix of state_dict\n            if list(state_dict.keys())[0].startswith('module.'):\n                state_dict = {k[7:]: v for k, v in state_dict.items()}\n\n            # reshape absolute position embedding\n            if state_dict.get('absolute_pos_embed') is not None:\n                absolute_pos_embed = state_dict['absolute_pos_embed']\n                N1, L, C1 = absolute_pos_embed.size()\n                N2, C2, H, W = self.absolute_pos_embed.size()\n                if N1 != N2 or C1 != C2 or L != H * W:\n                    logger.warning('Error in loading absolute_pos_embed, pass')\n                else:\n                    state_dict['absolute_pos_embed'] = absolute_pos_embed.view(\n                        N2, H, W, C2).permute(0, 3, 1, 2).contiguous()\n\n            # interpolate position bias table if needed\n            relative_position_bias_table_keys = [\n                k for k in state_dict.keys()\n                if 'relative_position_bias_table' in k\n            ]\n            for table_key in relative_position_bias_table_keys:\n                table_pretrained = state_dict[table_key]\n                table_current = self.state_dict()[table_key]\n                L1, nH1 = table_pretrained.size()\n                L2, nH2 = table_current.size()\n                if nH1 != nH2:\n                    logger.warning(f'Error in loading {table_key}, pass')\n                elif L1 != L2:\n                    S1 = int(L1**0.5)\n                    S2 = int(L2**0.5)\n                    table_pretrained_resized = F.interpolate(\n                        table_pretrained.permute(1, 0).reshape(1, nH1, S1, S1),\n                        size=(S2, S2),\n                        mode='bicubic')\n                    state_dict[table_key] = table_pretrained_resized.view(\n                        nH2, L2).permute(1, 0).contiguous()\n\n            # load state_dict\n            self.load_state_dict(state_dict, False)\n\n    def forward(self, x):\n        x, hw_shape = self.patch_embed(x)\n\n        if self.use_abs_pos_embed:\n            h, w = self.absolute_pos_embed.shape[1:3]\n            if hw_shape[0] != h or hw_shape[1] != w:\n                absolute_pos_embed = F.interpolate(\n                    self.absolute_pos_embed,\n                    size=hw_shape,\n                    mode='bicubic',\n                    align_corners=False).flatten(2).transpose(1, 2)\n            else:\n                absolute_pos_embed = self.absolute_pos_embed.flatten(\n                    2).transpose(1, 2)\n            x = x + absolute_pos_embed\n        x = self.drop_after_pos(x)\n\n        outs = []\n        for i, stage in enumerate(self.stages):\n            x, hw_shape, out, out_hw_shape = stage(x, hw_shape)\n            if i in self.out_indices:\n                norm_layer = getattr(self, f'norm{i}')\n                out = norm_layer(out)\n                out = out.view(-1, *out_hw_shape,\n                               self.num_features[i]).permute(0, 3, 1,\n                                                             2).contiguous()\n                outs.append(out)\n\n        return outs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/backbones/trident_resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import BaseModule\nfrom torch.nn.modules.utils import _pair\n\nfrom mmdet.models.backbones.resnet import Bottleneck, ResNet\nfrom mmdet.models.builder import BACKBONES\n\n\nclass TridentConv(BaseModule):\n    \"\"\"Trident Convolution Module.\n\n    Args:\n        in_channels (int): Number of channels in input.\n        out_channels (int): Number of channels in output.\n        kernel_size (int): Size of convolution kernel.\n        stride (int, optional): Convolution stride. Default: 1.\n        trident_dilations (tuple[int, int, int], optional): Dilations of\n            different trident branch. Default: (1, 2, 3).\n        test_branch_idx (int, optional): In inference, all 3 branches will\n            be used if `test_branch_idx==-1`, otherwise only branch with\n            index `test_branch_idx` will be used. Default: 1.\n        bias (bool, optional): Whether to use bias in convolution or not.\n            Default: False.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size,\n                 stride=1,\n                 trident_dilations=(1, 2, 3),\n                 test_branch_idx=1,\n                 bias=False,\n                 init_cfg=None):\n        super(TridentConv, self).__init__(init_cfg)\n        self.num_branch = len(trident_dilations)\n        self.with_bias = bias\n        self.test_branch_idx = test_branch_idx\n        self.stride = _pair(stride)\n        self.kernel_size = _pair(kernel_size)\n        self.paddings = _pair(trident_dilations)\n        self.dilations = trident_dilations\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.bias = bias\n\n        self.weight = nn.Parameter(\n            torch.Tensor(out_channels, in_channels, *self.kernel_size))\n        if bias:\n            self.bias = nn.Parameter(torch.Tensor(out_channels))\n        else:\n            self.bias = None\n\n    def extra_repr(self):\n        tmpstr = f'in_channels={self.in_channels}'\n        tmpstr += f', out_channels={self.out_channels}'\n        tmpstr += f', kernel_size={self.kernel_size}'\n        tmpstr += f', num_branch={self.num_branch}'\n        tmpstr += f', test_branch_idx={self.test_branch_idx}'\n        tmpstr += f', stride={self.stride}'\n        tmpstr += f', paddings={self.paddings}'\n        tmpstr += f', dilations={self.dilations}'\n        tmpstr += f', bias={self.bias}'\n        return tmpstr\n\n    def forward(self, inputs):\n        if self.training or self.test_branch_idx == -1:\n            outputs = [\n                F.conv2d(input, self.weight, self.bias, self.stride, padding,\n                         dilation) for input, dilation, padding in zip(\n                             inputs, self.dilations, self.paddings)\n            ]\n        else:\n            assert len(inputs) == 1\n            outputs = [\n                F.conv2d(inputs[0], self.weight, self.bias, self.stride,\n                         self.paddings[self.test_branch_idx],\n                         self.dilations[self.test_branch_idx])\n            ]\n\n        return outputs\n\n\n# Since TridentNet is defined over ResNet50 and ResNet101, here we\n# only support TridentBottleneckBlock.\nclass TridentBottleneck(Bottleneck):\n    \"\"\"BottleBlock for TridentResNet.\n\n    Args:\n        trident_dilations (tuple[int, int, int]): Dilations of different\n            trident branch.\n        test_branch_idx (int): In inference, all 3 branches will be used\n            if `test_branch_idx==-1`, otherwise only branch with index\n            `test_branch_idx` will be used.\n        concat_output (bool): Whether to concat the output list to a Tensor.\n            `True` only in the last Block.\n    \"\"\"\n\n    def __init__(self, trident_dilations, test_branch_idx, concat_output,\n                 **kwargs):\n\n        super(TridentBottleneck, self).__init__(**kwargs)\n        self.trident_dilations = trident_dilations\n        self.num_branch = len(trident_dilations)\n        self.concat_output = concat_output\n        self.test_branch_idx = test_branch_idx\n        self.conv2 = TridentConv(\n            self.planes,\n            self.planes,\n            kernel_size=3,\n            stride=self.conv2_stride,\n            bias=False,\n            trident_dilations=self.trident_dilations,\n            test_branch_idx=test_branch_idx,\n            init_cfg=dict(\n                type='Kaiming',\n                distribution='uniform',\n                mode='fan_in',\n                override=dict(name='conv2')))\n\n    def forward(self, x):\n\n        def _inner_forward(x):\n            num_branch = (\n                self.num_branch\n                if self.training or self.test_branch_idx == -1 else 1)\n            identity = x\n            if not isinstance(x, list):\n                x = (x, ) * num_branch\n                identity = x\n                if self.downsample is not None:\n                    identity = [self.downsample(b) for b in x]\n\n            out = [self.conv1(b) for b in x]\n            out = [self.norm1(b) for b in out]\n            out = [self.relu(b) for b in out]\n\n            if self.with_plugins:\n                for k in range(len(out)):\n                    out[k] = self.forward_plugin(out[k],\n                                                 self.after_conv1_plugin_names)\n\n            out = self.conv2(out)\n            out = [self.norm2(b) for b in out]\n            out = [self.relu(b) for b in out]\n            if self.with_plugins:\n                for k in range(len(out)):\n                    out[k] = self.forward_plugin(out[k],\n                                                 self.after_conv2_plugin_names)\n\n            out = [self.conv3(b) for b in out]\n            out = [self.norm3(b) for b in out]\n\n            if self.with_plugins:\n                for k in range(len(out)):\n                    out[k] = self.forward_plugin(out[k],\n                                                 self.after_conv3_plugin_names)\n\n            out = [\n                out_b + identity_b for out_b, identity_b in zip(out, identity)\n            ]\n            return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        out = [self.relu(b) for b in out]\n        if self.concat_output:\n            out = torch.cat(out, dim=0)\n        return out\n\n\ndef make_trident_res_layer(block,\n                           inplanes,\n                           planes,\n                           num_blocks,\n                           stride=1,\n                           trident_dilations=(1, 2, 3),\n                           style='pytorch',\n                           with_cp=False,\n                           conv_cfg=None,\n                           norm_cfg=dict(type='BN'),\n                           dcn=None,\n                           plugins=None,\n                           test_branch_idx=-1):\n    \"\"\"Build Trident Res Layers.\"\"\"\n\n    downsample = None\n    if stride != 1 or inplanes != planes * block.expansion:\n        downsample = []\n        conv_stride = stride\n        downsample.extend([\n            build_conv_layer(\n                conv_cfg,\n                inplanes,\n                planes * block.expansion,\n                kernel_size=1,\n                stride=conv_stride,\n                bias=False),\n            build_norm_layer(norm_cfg, planes * block.expansion)[1]\n        ])\n        downsample = nn.Sequential(*downsample)\n\n    layers = []\n    for i in range(num_blocks):\n        layers.append(\n            block(\n                inplanes=inplanes,\n                planes=planes,\n                stride=stride if i == 0 else 1,\n                trident_dilations=trident_dilations,\n                downsample=downsample if i == 0 else None,\n                style=style,\n                with_cp=with_cp,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                dcn=dcn,\n                plugins=plugins,\n                test_branch_idx=test_branch_idx,\n                concat_output=True if i == num_blocks - 1 else False))\n        inplanes = planes * block.expansion\n    return nn.Sequential(*layers)\n\n\n@BACKBONES.register_module()\nclass TridentResNet(ResNet):\n    \"\"\"The stem layer, stage 1 and stage 2 in Trident ResNet are identical to\n    ResNet, while in stage 3, Trident BottleBlock is utilized to replace the\n    normal BottleBlock to yield trident output. Different branch shares the\n    convolution weight but uses different dilations to achieve multi-scale\n    output.\n\n                               / stage3(b0) \\\n    x - stem - stage1 - stage2 - stage3(b1) - output\n                               \\ stage3(b2) /\n\n    Args:\n        depth (int): Depth of resnet, from {50, 101, 152}.\n        num_branch (int): Number of branches in TridentNet.\n        test_branch_idx (int): In inference, all 3 branches will be used\n            if `test_branch_idx==-1`, otherwise only branch with index\n            `test_branch_idx` will be used.\n        trident_dilations (tuple[int]): Dilations of different trident branch.\n            len(trident_dilations) should be equal to num_branch.\n    \"\"\"  # noqa\n\n    def __init__(self, depth, num_branch, test_branch_idx, trident_dilations,\n                 **kwargs):\n\n        assert num_branch == len(trident_dilations)\n        assert depth in (50, 101, 152)\n        super(TridentResNet, self).__init__(depth, **kwargs)\n        assert self.num_stages == 3\n        self.test_branch_idx = test_branch_idx\n        self.num_branch = num_branch\n\n        last_stage_idx = self.num_stages - 1\n        stride = self.strides[last_stage_idx]\n        dilation = trident_dilations\n        dcn = self.dcn if self.stage_with_dcn[last_stage_idx] else None\n        if self.plugins is not None:\n            stage_plugins = self.make_stage_plugins(self.plugins,\n                                                    last_stage_idx)\n        else:\n            stage_plugins = None\n        planes = self.base_channels * 2**last_stage_idx\n        res_layer = make_trident_res_layer(\n            TridentBottleneck,\n            inplanes=(self.block.expansion * self.base_channels *\n                      2**(last_stage_idx - 1)),\n            planes=planes,\n            num_blocks=self.stage_blocks[last_stage_idx],\n            stride=stride,\n            trident_dilations=dilation,\n            style=self.style,\n            with_cp=self.with_cp,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            dcn=dcn,\n            plugins=stage_plugins,\n            test_branch_idx=self.test_branch_idx)\n\n        layer_name = f'layer{last_stage_idx + 1}'\n\n        self.__setattr__(layer_name, res_layer)\n        self.res_layers.pop(last_stage_idx)\n        self.res_layers.insert(last_stage_idx, layer_name)\n\n        self._freeze_stages()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom mmcv.cnn import MODELS as MMCV_MODELS\nfrom mmcv.utils import Registry\n\nMODELS = Registry('models', parent=MMCV_MODELS)\n\nBACKBONES = MODELS\nNECKS = MODELS\nROI_EXTRACTORS = MODELS\nSHARED_HEADS = MODELS\nHEADS = MODELS\nLOSSES = MODELS\nDETECTORS = MODELS\n\n\ndef build_backbone(cfg):\n    \"\"\"Build backbone.\"\"\"\n    return BACKBONES.build(cfg)\n\n\ndef build_neck(cfg):\n    \"\"\"Build neck.\"\"\"\n    return NECKS.build(cfg)\n\n\ndef build_roi_extractor(cfg):\n    \"\"\"Build roi extractor.\"\"\"\n    return ROI_EXTRACTORS.build(cfg)\n\n\ndef build_shared_head(cfg):\n    \"\"\"Build shared head.\"\"\"\n    return SHARED_HEADS.build(cfg)\n\n\ndef build_head(cfg):\n    \"\"\"Build head.\"\"\"\n    return HEADS.build(cfg)\n\n\ndef build_loss(cfg):\n    \"\"\"Build loss.\"\"\"\n    return LOSSES.build(cfg)\n\n\ndef build_detector(cfg, train_cfg=None, test_cfg=None):\n    \"\"\"Build detector.\"\"\"\n    if train_cfg is not None or test_cfg is not None:\n        warnings.warn(\n            'train_cfg and test_cfg is deprecated, '\n            'please specify them in model', UserWarning)\n    assert cfg.get('train_cfg') is None or train_cfg is None, \\\n        'train_cfg specified in both outer field and model field '\n    assert cfg.get('test_cfg') is None or test_cfg is None, \\\n        'test_cfg specified in both outer field and model field '\n    return DETECTORS.build(\n        cfg, default_args=dict(train_cfg=train_cfg, test_cfg=test_cfg))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .anchor_free_head import AnchorFreeHead\nfrom .anchor_head import AnchorHead\nfrom .atss_head import ATSSHead\nfrom .autoassign_head import AutoAssignHead\nfrom .cascade_rpn_head import CascadeRPNHead, StageCascadeRPNHead\nfrom .centernet_head import CenterNetHead\nfrom .centripetal_head import CentripetalHead\nfrom .corner_head import CornerHead\nfrom .ddod_head import DDODHead\nfrom .deformable_detr_head import DeformableDETRHead\nfrom .detr_head import DETRHead\nfrom .embedding_rpn_head import EmbeddingRPNHead\nfrom .fcos_head import FCOSHead\nfrom .fovea_head import FoveaHead\nfrom .free_anchor_retina_head import FreeAnchorRetinaHead\nfrom .fsaf_head import FSAFHead\nfrom .ga_retina_head import GARetinaHead\nfrom .ga_rpn_head import GARPNHead\nfrom .gfl_head import GFLHead\nfrom .guided_anchor_head import FeatureAdaption, GuidedAnchorHead\nfrom .lad_head import LADHead\nfrom .ld_head import LDHead\nfrom .mask2former_head import Mask2FormerHead\nfrom .maskformer_head import MaskFormerHead\nfrom .nasfcos_head import NASFCOSHead\nfrom .paa_head import PAAHead\nfrom .pisa_retinanet_head import PISARetinaHead\nfrom .pisa_ssd_head import PISASSDHead\nfrom .reppoints_head import RepPointsHead\nfrom .retina_head import RetinaHead\nfrom .retina_sepbn_head import RetinaSepBNHead\nfrom .rpn_head import RPNHead\nfrom .sabl_retina_head import SABLRetinaHead\nfrom .solo_head import DecoupledSOLOHead, DecoupledSOLOLightHead, SOLOHead\nfrom .solov2_head import SOLOV2Head\nfrom .ssd_head import SSDHead\nfrom .tood_head import TOODHead\nfrom .vfnet_head import VFNetHead\nfrom .yolact_head import YOLACTHead, YOLACTProtonet, YOLACTSegmHead\nfrom .yolo_head import YOLOV3Head\nfrom .yolof_head import YOLOFHead\nfrom .yolox_head import YOLOXHead\n\n__all__ = [\n    'AnchorFreeHead', 'AnchorHead', 'GuidedAnchorHead', 'FeatureAdaption',\n    'RPNHead', 'GARPNHead', 'RetinaHead', 'RetinaSepBNHead', 'GARetinaHead',\n    'SSDHead', 'FCOSHead', 'RepPointsHead', 'FoveaHead',\n    'FreeAnchorRetinaHead', 'ATSSHead', 'FSAFHead', 'NASFCOSHead',\n    'PISARetinaHead', 'PISASSDHead', 'GFLHead', 'CornerHead', 'YOLACTHead',\n    'YOLACTSegmHead', 'YOLACTProtonet', 'YOLOV3Head', 'PAAHead',\n    'SABLRetinaHead', 'CentripetalHead', 'VFNetHead', 'StageCascadeRPNHead',\n    'CascadeRPNHead', 'EmbeddingRPNHead', 'LDHead', 'AutoAssignHead',\n    'DETRHead', 'YOLOFHead', 'DeformableDETRHead', 'SOLOHead',\n    'DecoupledSOLOHead', 'CenterNetHead', 'YOLOXHead',\n    'DecoupledSOLOLightHead', 'LADHead', 'TOODHead', 'MaskFormerHead',\n    'Mask2FormerHead', 'SOLOV2Head', 'DDODHead'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/anchor_free_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom abc import abstractmethod\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import build_bbox_coder, multi_apply\nfrom mmdet.core.anchor.point_generator import MlvlPointGenerator\nfrom ..builder import HEADS, build_loss\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\n\n\n@HEADS.register_module()\nclass AnchorFreeHead(BaseDenseHead, BBoxTestMixin):\n    \"\"\"Anchor-free head (FCOS, Fovea, RepPoints, etc.).\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels. Used in child classes.\n        stacked_convs (int): Number of stacking convs of the head.\n        strides (tuple): Downsample factor of each feature map.\n        dcn_on_last_conv (bool): If true, use dcn in the last layer of\n            towers. Default: False.\n        conv_bias (bool | str): If specified as `auto`, it will be decided by\n            the norm_cfg. Bias of conv will be set as True if `norm_cfg` is\n            None, otherwise False. Default: \"auto\".\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        bbox_coder (dict): Config of bbox coder. Defaults\n            'DistancePointBBoxCoder'.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        train_cfg (dict): Training config of anchor head.\n        test_cfg (dict): Testing config of anchor head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"  # noqa: W605\n\n    _version = 1\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 feat_channels=256,\n                 stacked_convs=4,\n                 strides=(4, 8, 16, 32, 64),\n                 dcn_on_last_conv=False,\n                 conv_bias='auto',\n                 loss_cls=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_bbox=dict(type='IoULoss', loss_weight=1.0),\n                 bbox_coder=dict(type='DistancePointBBoxCoder'),\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='conv_cls',\n                         std=0.01,\n                         bias_prob=0.01))):\n        super(AnchorFreeHead, self).__init__(init_cfg)\n        self.num_classes = num_classes\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        if self.use_sigmoid_cls:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.strides = strides\n        self.dcn_on_last_conv = dcn_on_last_conv\n        assert conv_bias == 'auto' or isinstance(conv_bias, bool)\n        self.conv_bias = conv_bias\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n        self.prior_generator = MlvlPointGenerator(strides)\n\n        # In order to keep a more general interface and be consistent with\n        # anchor_head. We can think of point like one anchor\n        self.num_base_priors = self.prior_generator.num_base_priors[0]\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.fp16_enabled = False\n\n        self._init_layers()\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self._init_cls_convs()\n        self._init_reg_convs()\n        self._init_predictor()\n\n    def _init_cls_convs(self):\n        \"\"\"Initialize classification conv layers of the head.\"\"\"\n        self.cls_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            if self.dcn_on_last_conv and i == self.stacked_convs - 1:\n                conv_cfg = dict(type='DCNv2')\n            else:\n                conv_cfg = self.conv_cfg\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.conv_bias))\n\n    def _init_reg_convs(self):\n        \"\"\"Initialize bbox regression conv layers of the head.\"\"\"\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            if self.dcn_on_last_conv and i == self.stacked_convs - 1:\n                conv_cfg = dict(type='DCNv2')\n            else:\n                conv_cfg = self.conv_cfg\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.conv_bias))\n\n    def _init_predictor(self):\n        \"\"\"Initialize predictor layers of the head.\"\"\"\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n        self.conv_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)\n\n    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\n                              missing_keys, unexpected_keys, error_msgs):\n        \"\"\"Hack some keys of the model state dict so that can load checkpoints\n        of previous version.\"\"\"\n        version = local_metadata.get('version', None)\n        if version is None:\n            # the key is different in early versions\n            # for example, 'fcos_cls' become 'conv_cls' now\n            bbox_head_keys = [\n                k for k in state_dict.keys() if k.startswith(prefix)\n            ]\n            ori_predictor_keys = []\n            new_predictor_keys = []\n            # e.g. 'fcos_cls' or 'fcos_reg'\n            for key in bbox_head_keys:\n                ori_predictor_keys.append(key)\n                key = key.split('.')\n                conv_name = None\n                if key[1].endswith('cls'):\n                    conv_name = 'conv_cls'\n                elif key[1].endswith('reg'):\n                    conv_name = 'conv_reg'\n                elif key[1].endswith('centerness'):\n                    conv_name = 'conv_centerness'\n                else:\n                    assert NotImplementedError\n                if conv_name is not None:\n                    key[1] = conv_name\n                    new_predictor_keys.append('.'.join(key))\n                else:\n                    ori_predictor_keys.pop(-1)\n            for i in range(len(new_predictor_keys)):\n                state_dict[new_predictor_keys[i]] = state_dict.pop(\n                    ori_predictor_keys[i])\n        super()._load_from_state_dict(state_dict, prefix, local_metadata,\n                                      strict, missing_keys, unexpected_keys,\n                                      error_msgs)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually contain classification scores and bbox predictions.\n                cls_scores (list[Tensor]): Box scores for each scale level,\n                    each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * 4.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)[:2]\n\n    def forward_single(self, x):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n\n        Returns:\n            tuple: Scores for each class, bbox predictions, features\n                after classification and regression conv layers, some\n                models needs these features like FCOS.\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n\n        for cls_layer in self.cls_convs:\n            cls_feat = cls_layer(cls_feat)\n        cls_score = self.conv_cls(cls_feat)\n\n        for reg_layer in self.reg_convs:\n            reg_feat = reg_layer(reg_feat)\n        bbox_pred = self.conv_reg(reg_feat)\n        return cls_score, bbox_pred, cls_feat, reg_feat\n\n    @abstractmethod\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * 4.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n        \"\"\"\n\n        raise NotImplementedError\n\n    @abstractmethod\n    def get_targets(self, points, gt_bboxes_list, gt_labels_list):\n        \"\"\"Compute regression, classification and centerness targets for points\n        in multiple images.\n\n        Args:\n            points (list[Tensor]): Points of each fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n        \"\"\"\n        raise NotImplementedError\n\n    def _get_points_single(self,\n                           featmap_size,\n                           stride,\n                           dtype,\n                           device,\n                           flatten=False):\n        \"\"\"Get points of a single scale level.\n\n        This function will be deprecated soon.\n        \"\"\"\n\n        warnings.warn(\n            '`_get_points_single` in `AnchorFreeHead` will be '\n            'deprecated soon, we support a multi level point generator now'\n            'you can get points of a single level feature map '\n            'with `self.prior_generator.single_level_grid_priors` ')\n\n        h, w = featmap_size\n        # First create Range with the default dtype, than convert to\n        # target `dtype` for onnx exporting.\n        x_range = torch.arange(w, device=device).to(dtype)\n        y_range = torch.arange(h, device=device).to(dtype)\n        y, x = torch.meshgrid(y_range, x_range)\n        if flatten:\n            y = y.flatten()\n            x = x.flatten()\n        return y, x\n\n    def get_points(self, featmap_sizes, dtype, device, flatten=False):\n        \"\"\"Get points according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            dtype (torch.dtype): Type of points.\n            device (torch.device): Device of points.\n\n        Returns:\n            tuple: points of each image.\n        \"\"\"\n        warnings.warn(\n            '`get_points` in `AnchorFreeHead` will be '\n            'deprecated soon, we support a multi level point generator now'\n            'you can get points of all levels '\n            'with `self.prior_generator.grid_priors` ')\n\n        mlvl_points = []\n        for i in range(len(featmap_sizes)):\n            mlvl_points.append(\n                self._get_points_single(featmap_sizes[i], self.strides[i],\n                                        dtype, device, flatten))\n        return mlvl_points\n\n    def aug_test(self, feats, img_metas, rescale=False):\n        \"\"\"Test function with test time augmentation.\n\n        Args:\n            feats (list[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains features for all images in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch. each dict has image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[ndarray]: bbox results of each class\n        \"\"\"\n        return self.aug_test_bboxes(feats, img_metas, rescale=rescale)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/anchor_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, build_assigner, build_bbox_coder,\n                        build_prior_generator, build_sampler, images_to_levels,\n                        multi_apply, unmap)\nfrom ..builder import HEADS, build_loss\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\n\n\n@HEADS.register_module()\nclass AnchorHead(BaseDenseHead, BBoxTestMixin):\n    \"\"\"Anchor-based head (RPN, RetinaNet, SSD, etc.).\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels. Used in child classes.\n        anchor_generator (dict): Config dict for anchor generator\n        bbox_coder (dict): Config of bounding box coder.\n        reg_decoded_bbox (bool): If true, the regression loss would be\n            applied directly on decoded bounding boxes, converting both\n            the predicted boxes and regression targets to absolute\n            coordinates format. Default False. It should be `True` when\n            using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        train_cfg (dict): Training config of anchor head.\n        test_cfg (dict): Testing config of anchor head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 feat_channels=256,\n                 anchor_generator=dict(\n                     type='AnchorGenerator',\n                     scales=[8, 16, 32],\n                     ratios=[0.5, 1.0, 2.0],\n                     strides=[4, 8, 16, 32, 64]),\n                 bbox_coder=dict(\n                     type='DeltaXYWHBBoxCoder',\n                     clip_border=True,\n                     target_means=(.0, .0, .0, .0),\n                     target_stds=(1.0, 1.0, 1.0, 1.0)),\n                 reg_decoded_bbox=False,\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=dict(type='Normal', layer='Conv2d', std=0.01)):\n        super(AnchorHead, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.feat_channels = feat_channels\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        if self.use_sigmoid_cls:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n\n        if self.cls_out_channels <= 0:\n            raise ValueError(f'num_classes={num_classes} is too small')\n        self.reg_decoded_bbox = reg_decoded_bbox\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            if hasattr(self.train_cfg,\n                       'sampler') and self.train_cfg.sampler.type.split(\n                           '.')[-1] != 'PseudoSampler':\n                self.sampling = True\n                sampler_cfg = self.train_cfg.sampler\n                # avoid BC-breaking\n                if loss_cls['type'] in [\n                        'FocalLoss', 'GHMC', 'QualityFocalLoss'\n                ]:\n                    warnings.warn(\n                        'DeprecationWarning: Determining whether to sampling'\n                        'by loss type is deprecated, please delete sampler in'\n                        'your config when using `FocalLoss`, `GHMC`, '\n                        '`QualityFocalLoss` or other FocalLoss variant.')\n                    self.sampling = False\n                    sampler_cfg = dict(type='PseudoSampler')\n            else:\n                self.sampling = False\n                sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.fp16_enabled = False\n\n        self.prior_generator = build_prior_generator(anchor_generator)\n\n        # Usually the numbers of anchors for each level are the same\n        # except SSD detectors. So it is an int in the most dense\n        # heads but a list of int in SSDHead\n        self.num_base_priors = self.prior_generator.num_base_priors[0]\n        self._init_layers()\n\n    @property\n    def num_anchors(self):\n        warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '\n                      'for consistency or also use '\n                      '`num_base_priors` instead')\n        return self.prior_generator.num_base_priors[0]\n\n    @property\n    def anchor_generator(self):\n        warnings.warn('DeprecationWarning: anchor_generator is deprecated, '\n                      'please use \"prior_generator\" instead')\n        return self.prior_generator\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.conv_cls = nn.Conv2d(self.in_channels,\n                                  self.num_base_priors * self.cls_out_channels,\n                                  1)\n        self.conv_reg = nn.Conv2d(self.in_channels, self.num_base_priors * 4,\n                                  1)\n\n    def forward_single(self, x):\n        \"\"\"Forward feature of a single scale level.\n\n        Args:\n            x (Tensor): Features of a single scale level.\n\n        Returns:\n            tuple:\n                cls_score (Tensor): Cls scores for a single scale level \\\n                    the channels number is num_base_priors * num_classes.\n                bbox_pred (Tensor): Box energies / deltas for a single scale \\\n                    level, the channels number is num_base_priors * 4.\n        \"\"\"\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        return cls_score, bbox_pred\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: A tuple of classification scores and bbox prediction.\n\n                - cls_scores (list[Tensor]): Classification scores for all \\\n                    scale levels, each is a 4D-tensor, the channels number \\\n                    is num_base_priors * num_classes.\n                - bbox_preds (list[Tensor]): Box energies / deltas for all \\\n                    scale levels, each is a 4D-tensor, the channels number \\\n                    is num_base_priors * 4.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def get_anchors(self, featmap_sizes, img_metas, device='cuda'):\n        \"\"\"Get anchors according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            img_metas (list[dict]): Image meta info.\n            device (torch.device | str): Device for returned tensors\n\n        Returns:\n            tuple:\n                anchor_list (list[Tensor]): Anchors of each image.\n                valid_flag_list (list[Tensor]): Valid flags of each image.\n        \"\"\"\n        num_imgs = len(img_metas)\n\n        # since feature map sizes of all images are the same, we only compute\n        # anchors for one time\n        multi_level_anchors = self.prior_generator.grid_priors(\n            featmap_sizes, device=device)\n        anchor_list = [multi_level_anchors for _ in range(num_imgs)]\n\n        # for each image, we compute valid flags of multi level anchors\n        valid_flag_list = []\n        for img_id, img_meta in enumerate(img_metas):\n            multi_level_flags = self.prior_generator.valid_flags(\n                featmap_sizes, img_meta['pad_shape'], device)\n            valid_flag_list.append(multi_level_flags)\n\n        return anchor_list, valid_flag_list\n\n    def _get_targets_single(self,\n                            flat_anchors,\n                            valid_flags,\n                            gt_bboxes,\n                            gt_bboxes_ignore,\n                            gt_labels,\n                            img_meta,\n                            label_channels=1,\n                            unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in a\n        single image.\n\n        Args:\n            flat_anchors (Tensor): Multi-level anchors of the image, which are\n                concatenated into a single tensor of shape (num_anchors ,4)\n            valid_flags (Tensor): Multi level valid flags of the image,\n                which are concatenated into a single tensor of\n                    shape (num_anchors,).\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            img_meta (dict): Meta info of the image.\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple:\n                labels_list (list[Tensor]): Labels of each level\n                label_weights_list (list[Tensor]): Label weights of each level\n                bbox_targets_list (list[Tensor]): BBox targets of each level\n                bbox_weights_list (list[Tensor]): BBox weights of each level\n                num_total_pos (int): Number of positive samples in all images\n                num_total_neg (int): Number of negative samples in all images\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 7\n        # assign gt and sample anchors\n        anchors = flat_anchors[inside_flags, :]\n\n        assign_result = self.assigner.assign(\n            anchors, gt_bboxes, gt_bboxes_ignore,\n            None if self.sampling else gt_labels)\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            if not self.reg_decoded_bbox:\n                pos_bbox_targets = self.bbox_coder.encode(\n                    sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n            else:\n                pos_bbox_targets = sampling_result.pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class since v2.5.0\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags,\n                fill=self.num_classes)  # fill bg label\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n\n        return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,\n                neg_inds, sampling_result)\n\n    def get_targets(self,\n                    anchor_list,\n                    valid_flag_list,\n                    gt_bboxes_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None,\n                    gt_labels_list=None,\n                    label_channels=1,\n                    unmap_outputs=True,\n                    return_sampling_results=False):\n        \"\"\"Compute regression and classification targets for anchors in\n        multiple images.\n\n        Args:\n            anchor_list (list[list[Tensor]]): Multi level anchors of each\n                image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_anchors, 4).\n            valid_flag_list (list[list[Tensor]]): Multi level valid flags of\n                each image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_anchors, )\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be\n                ignored.\n            gt_labels_list (list[Tensor]): Ground truth labels of each box.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: Usually returns a tuple containing learning targets.\n\n                - labels_list (list[Tensor]): Labels of each level.\n                - label_weights_list (list[Tensor]): Label weights of each\n                  level.\n                - bbox_targets_list (list[Tensor]): BBox targets of each level.\n                - bbox_weights_list (list[Tensor]): BBox weights of each level.\n                - num_total_pos (int): Number of positive samples in all\n                  images.\n                - num_total_neg (int): Number of negative samples in all\n                  images.\n\n            additional_returns: This function enables user-defined returns from\n                `self._get_targets_single`. These returns are currently refined\n                to properties at each feature map (i.e. having HxW dimension).\n                The results will be concatenated after the end\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        # concat all level anchors to a single tensor\n        concat_anchor_list = []\n        concat_valid_flag_list = []\n        for i in range(num_imgs):\n            assert len(anchor_list[i]) == len(valid_flag_list[i])\n            concat_anchor_list.append(torch.cat(anchor_list[i]))\n            concat_valid_flag_list.append(torch.cat(valid_flag_list[i]))\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        results = multi_apply(\n            self._get_targets_single,\n            concat_anchor_list,\n            concat_valid_flag_list,\n            gt_bboxes_list,\n            gt_bboxes_ignore_list,\n            gt_labels_list,\n            img_metas,\n            label_channels=label_channels,\n            unmap_outputs=unmap_outputs)\n        (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,\n         pos_inds_list, neg_inds_list, sampling_results_list) = results[:7]\n        rest_results = list(results[7:])  # user-added return values\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors)\n        res = (labels_list, label_weights_list, bbox_targets_list,\n               bbox_weights_list, num_total_pos, num_total_neg)\n        if return_sampling_results:\n            res = res + (sampling_results_list, )\n        for i, r in enumerate(rest_results):  # user-added return values\n            rest_results[i] = images_to_levels(r, num_level_anchors)\n\n        return res + tuple(rest_results)\n\n    def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights,\n                    bbox_targets, bbox_weights, num_total_samples):\n        \"\"\"Compute loss of a single scale level.\n\n        Args:\n            cls_score (Tensor): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W).\n            bbox_pred (Tensor): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W).\n            anchors (Tensor): Box reference for each scale level with shape\n                (N, num_total_anchors, 4).\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors)\n            bbox_targets (Tensor): BBox regression targets of each anchor\n                weight shape (N, num_total_anchors, 4).\n            bbox_weights (Tensor): BBox regression loss weights of each anchor\n                with shape (N, num_total_anchors, 4).\n            num_total_samples (int): If sampling, num total samples equal to\n                the number of total anchors; Otherwise, it is the number of\n                positive anchors.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        # classification loss\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.permute(0, 2, 3,\n                                      1).reshape(-1, self.cls_out_channels)\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n        # regression loss\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        bbox_weights = bbox_weights.reshape(-1, 4)\n        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n        if self.reg_decoded_bbox:\n            # When the regression loss (e.g. `IouLoss`, `GIouLoss`)\n            # is applied directly on the decoded bounding boxes, it\n            # decodes the already encoded coordinates to absolute format.\n            anchors = anchors.reshape(-1, 4)\n            bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)\n        loss_bbox = self.loss_bbox(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            avg_factor=num_total_samples)\n        return loss_cls, loss_bbox\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss. Default: None\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        # concat all level anchors and flags to a single tensor\n        concat_anchor_list = []\n        for i in range(len(anchor_list)):\n            concat_anchor_list.append(torch.cat(anchor_list[i]))\n        all_anchor_list = images_to_levels(concat_anchor_list,\n                                           num_level_anchors)\n\n        losses_cls, losses_bbox = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            all_anchor_list,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            num_total_samples=num_total_samples)\n        return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)\n\n    def aug_test(self, feats, img_metas, rescale=False):\n        \"\"\"Test function with test time augmentation.\n\n        Args:\n            feats (list[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains features for all images in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch. each dict has image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is ``bboxes`` with shape (n, 5), where\n                5 represent (tl_x, tl_y, br_x, br_y, score).\n                The shape of the second tensor in the tuple is ``labels``\n                with shape (n,), The length of list should always be 1.\n        \"\"\"\n        return self.aug_test_bboxes(feats, img_metas, rescale=rescale)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/atss_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, Scale\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, build_assigner, build_sampler,\n                        images_to_levels, multi_apply, reduce_mean, unmap)\nfrom ..builder import HEADS, build_loss\nfrom .anchor_head import AnchorHead\n\n\n@HEADS.register_module()\nclass ATSSHead(AnchorHead):\n    \"\"\"Bridging the Gap Between Anchor-based and Anchor-free Detection via\n    Adaptive Training Sample Selection.\n\n    ATSS head structure is similar with FCOS, however ATSS use anchor boxes\n    and assign label by Adaptive Training Sample Selection instead max-iou.\n\n    https://arxiv.org/abs/1912.02424\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 pred_kernel_size=3,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 reg_decoded_bbox=True,\n                 loss_centerness=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='atss_cls',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        self.pred_kernel_size = pred_kernel_size\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        super(ATSSHead, self).__init__(\n            num_classes,\n            in_channels,\n            reg_decoded_bbox=reg_decoded_bbox,\n            init_cfg=init_cfg,\n            **kwargs)\n\n        self.sampling = False\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # SSD sampling=False so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.loss_centerness = build_loss(loss_centerness)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        pred_pad_size = self.pred_kernel_size // 2\n        self.atss_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_anchors * self.cls_out_channels,\n            self.pred_kernel_size,\n            padding=pred_pad_size)\n        self.atss_reg = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * 4,\n            self.pred_kernel_size,\n            padding=pred_pad_size)\n        self.atss_centerness = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * 1,\n            self.pred_kernel_size,\n            padding=pred_pad_size)\n        self.scales = nn.ModuleList(\n            [Scale(1.0) for _ in self.prior_generator.strides])\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually a tuple of classification scores and bbox prediction\n                cls_scores (list[Tensor]): Classification scores for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * 4.\n        \"\"\"\n        return multi_apply(self.forward_single, feats, self.scales)\n\n    def forward_single(self, x, scale):\n        \"\"\"Forward feature of a single scale level.\n\n        Args:\n            x (Tensor): Features of a single scale level.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n\n        Returns:\n            tuple:\n                cls_score (Tensor): Cls scores for a single scale level\n                    the channels number is num_anchors * num_classes.\n                bbox_pred (Tensor): Box energies / deltas for a single scale\n                    level, the channels number is num_anchors * 4.\n                centerness (Tensor): Centerness for a single scale level, the\n                    channel number is (N, num_anchors * 1, H, W).\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n        cls_score = self.atss_cls(cls_feat)\n        # we just follow atss, not apply exp in bbox_pred\n        bbox_pred = scale(self.atss_reg(reg_feat)).float()\n        centerness = self.atss_centerness(reg_feat)\n        return cls_score, bbox_pred, centerness\n\n    def loss_single(self, anchors, cls_score, bbox_pred, centerness, labels,\n                    label_weights, bbox_targets, num_total_samples):\n        \"\"\"Compute loss of a single scale level.\n\n        Args:\n            cls_score (Tensor): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W).\n            bbox_pred (Tensor): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W).\n            anchors (Tensor): Box reference for each scale level with shape\n                (N, num_total_anchors, 4).\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors)\n            bbox_targets (Tensor): BBox regression targets of each anchor\n                weight shape (N, num_total_anchors, 4).\n            num_total_samples (int): Number os positive samples that is\n                reduced over all GPUs.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n        anchors = anchors.reshape(-1, 4)\n        cls_score = cls_score.permute(0, 2, 3, 1).reshape(\n            -1, self.cls_out_channels).contiguous()\n        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n        centerness = centerness.permute(0, 2, 3, 1).reshape(-1)\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n\n        # classification loss\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    & (labels < bg_class_ind)).nonzero().squeeze(1)\n\n        if len(pos_inds) > 0:\n            pos_bbox_targets = bbox_targets[pos_inds]\n            pos_bbox_pred = bbox_pred[pos_inds]\n            pos_anchors = anchors[pos_inds]\n            pos_centerness = centerness[pos_inds]\n\n            centerness_targets = self.centerness_target(\n                pos_anchors, pos_bbox_targets)\n            pos_decode_bbox_pred = self.bbox_coder.decode(\n                pos_anchors, pos_bbox_pred)\n\n            # regression loss\n            loss_bbox = self.loss_bbox(\n                pos_decode_bbox_pred,\n                pos_bbox_targets,\n                weight=centerness_targets,\n                avg_factor=1.0)\n\n            # centerness loss\n            loss_centerness = self.loss_centerness(\n                pos_centerness,\n                centerness_targets,\n                avg_factor=num_total_samples)\n\n        else:\n            loss_bbox = bbox_pred.sum() * 0\n            loss_centerness = centerness.sum() * 0\n            centerness_targets = bbox_targets.new_tensor(0.)\n\n        return loss_cls, loss_bbox, loss_centerness, centerness_targets.sum()\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'centernesses'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             centernesses,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            centernesses (list[Tensor]): Centerness for each scale\n                level with shape (N, num_anchors * 1, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n\n        (anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets\n\n        num_total_samples = reduce_mean(\n            torch.tensor(num_total_pos, dtype=torch.float,\n                         device=device)).item()\n        num_total_samples = max(num_total_samples, 1.0)\n\n        losses_cls, losses_bbox, loss_centerness,\\\n            bbox_avg_factor = multi_apply(\n                self.loss_single,\n                anchor_list,\n                cls_scores,\n                bbox_preds,\n                centernesses,\n                labels_list,\n                label_weights_list,\n                bbox_targets_list,\n                num_total_samples=num_total_samples)\n\n        bbox_avg_factor = sum(bbox_avg_factor)\n        bbox_avg_factor = reduce_mean(bbox_avg_factor).clamp_(min=1).item()\n        losses_bbox = list(map(lambda x: x / bbox_avg_factor, losses_bbox))\n        return dict(\n            loss_cls=losses_cls,\n            loss_bbox=losses_bbox,\n            loss_centerness=loss_centerness)\n\n    def centerness_target(self, anchors, gts):\n        # only calculate pos centerness targets, otherwise there may be nan\n        anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2\n        anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2\n        l_ = anchors_cx - gts[:, 0]\n        t_ = anchors_cy - gts[:, 1]\n        r_ = gts[:, 2] - anchors_cx\n        b_ = gts[:, 3] - anchors_cy\n\n        left_right = torch.stack([l_, r_], dim=1)\n        top_bottom = torch.stack([t_, b_], dim=1)\n        centerness = torch.sqrt(\n            (left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) *\n            (top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0]))\n        assert not torch.isnan(centerness).any()\n        return centerness\n\n    def get_targets(self,\n                    anchor_list,\n                    valid_flag_list,\n                    gt_bboxes_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None,\n                    gt_labels_list=None,\n                    label_channels=1,\n                    unmap_outputs=True):\n        \"\"\"Get targets for ATSS head.\n\n        This method is almost the same as `AnchorHead.get_targets()`. Besides\n        returning the targets as the parent method does, it also returns the\n        anchors as the first element of the returned tuple.\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        num_level_anchors_list = [num_level_anchors] * num_imgs\n\n        # concat all level anchors and flags to a single tensor\n        for i in range(num_imgs):\n            assert len(anchor_list[i]) == len(valid_flag_list[i])\n            anchor_list[i] = torch.cat(anchor_list[i])\n            valid_flag_list[i] = torch.cat(valid_flag_list[i])\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        (all_anchors, all_labels, all_label_weights, all_bbox_targets,\n         all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single,\n             anchor_list,\n             valid_flag_list,\n             num_level_anchors_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             img_metas,\n             label_channels=label_channels,\n             unmap_outputs=unmap_outputs)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        anchors_list = images_to_levels(all_anchors, num_level_anchors)\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors)\n        return (anchors_list, labels_list, label_weights_list,\n                bbox_targets_list, bbox_weights_list, num_total_pos,\n                num_total_neg)\n\n    def _get_target_single(self,\n                           flat_anchors,\n                           valid_flags,\n                           num_level_anchors,\n                           gt_bboxes,\n                           gt_bboxes_ignore,\n                           gt_labels,\n                           img_meta,\n                           label_channels=1,\n                           unmap_outputs=True):\n        \"\"\"Compute regression, classification targets for anchors in a single\n        image.\n\n        Args:\n            flat_anchors (Tensor): Multi-level anchors of the image, which are\n                concatenated into a single tensor of shape (num_anchors ,4)\n            valid_flags (Tensor): Multi level valid flags of the image,\n                which are concatenated into a single tensor of\n                    shape (num_anchors,).\n            num_level_anchors Tensor): Number of anchors of each scale level.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            img_meta (dict): Meta info of the image.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: N is the number of total anchors in the image.\n                labels (Tensor): Labels of all anchors in the image with shape\n                    (N,).\n                label_weights (Tensor): Label weights of all anchor in the\n                    image with shape (N,).\n                bbox_targets (Tensor): BBox targets of all anchors in the\n                    image with shape (N, 4).\n                bbox_weights (Tensor): BBox weights of all anchors in the\n                    image with shape (N, 4)\n                pos_inds (Tensor): Indices of positive anchor with shape\n                    (num_pos,).\n                neg_inds (Tensor): Indices of negative anchor with shape\n                    (num_neg,).\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 7\n        # assign gt and sample anchors\n        anchors = flat_anchors[inside_flags, :]\n\n        num_level_anchors_inside = self.get_num_level_anchors_inside(\n            num_level_anchors, inside_flags)\n        assign_result = self.assigner.assign(anchors, num_level_anchors_inside,\n                                             gt_bboxes, gt_bboxes_ignore,\n                                             gt_labels)\n\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            if self.reg_decoded_bbox:\n                pos_bbox_targets = sampling_result.pos_gt_bboxes\n            else:\n                pos_bbox_targets = self.bbox_coder.encode(\n                    sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class since v2.5.0\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            anchors = unmap(anchors, num_total_anchors, inside_flags)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags, fill=self.num_classes)\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n\n        return (anchors, labels, label_weights, bbox_targets, bbox_weights,\n                pos_inds, neg_inds)\n\n    def get_num_level_anchors_inside(self, num_level_anchors, inside_flags):\n        split_inside_flags = torch.split(inside_flags, num_level_anchors)\n        num_level_anchors_inside = [\n            int(flags.sum()) for flags in split_inside_flags\n        ]\n        return num_level_anchors_inside\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/autoassign_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import bias_init_with_prob, normal_init\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import multi_apply\nfrom mmdet.core.anchor.point_generator import MlvlPointGenerator\nfrom mmdet.core.bbox import bbox_overlaps\nfrom mmdet.models import HEADS\nfrom mmdet.models.dense_heads.atss_head import reduce_mean\nfrom mmdet.models.dense_heads.fcos_head import FCOSHead\nfrom mmdet.models.dense_heads.paa_head import levels_to_images\n\nEPS = 1e-12\n\n\nclass CenterPrior(nn.Module):\n    \"\"\"Center Weighting module to adjust the category-specific prior\n    distributions.\n\n    Args:\n        force_topk (bool): When no point falls into gt_bbox, forcibly\n            select the k points closest to the center to calculate\n            the center prior. Defaults to False.\n        topk (int): The number of points used to calculate the\n            center prior when no point falls in gt_bbox. Only work when\n            force_topk if True. Defaults to 9.\n        num_classes (int): The class number of dataset. Defaults to 80.\n        strides (tuple[int]): The stride of each input feature map. Defaults\n            to (8, 16, 32, 64, 128).\n    \"\"\"\n\n    def __init__(self,\n                 force_topk=False,\n                 topk=9,\n                 num_classes=80,\n                 strides=(8, 16, 32, 64, 128)):\n        super(CenterPrior, self).__init__()\n        self.mean = nn.Parameter(torch.zeros(num_classes, 2))\n        self.sigma = nn.Parameter(torch.ones(num_classes, 2))\n        self.strides = strides\n        self.force_topk = force_topk\n        self.topk = topk\n\n    def forward(self, anchor_points_list, gt_bboxes, labels,\n                inside_gt_bbox_mask):\n        \"\"\"Get the center prior of each point on the feature map for each\n        instance.\n\n        Args:\n            anchor_points_list (list[Tensor]): list of coordinate\n                of points on feature map. Each with shape\n                (num_points, 2).\n            gt_bboxes (Tensor): The gt_bboxes with shape of\n                (num_gt, 4).\n            labels (Tensor): The gt_labels with shape of (num_gt).\n            inside_gt_bbox_mask (Tensor): Tensor of bool type,\n                with shape of (num_points, num_gt), each\n                value is used to mark whether this point falls\n                within a certain gt.\n\n        Returns:\n            tuple(Tensor):\n\n                - center_prior_weights(Tensor): Float tensor with shape \\\n                    of (num_points, num_gt). Each value represents \\\n                    the center weighting coefficient.\n                - inside_gt_bbox_mask (Tensor): Tensor of bool type, \\\n                    with shape of (num_points, num_gt), each \\\n                    value is used to mark whether this point falls \\\n                    within a certain gt or is the topk nearest points for \\\n                    a specific gt_bbox.\n        \"\"\"\n        inside_gt_bbox_mask = inside_gt_bbox_mask.clone()\n        num_gts = len(labels)\n        num_points = sum([len(item) for item in anchor_points_list])\n        if num_gts == 0:\n            return gt_bboxes.new_zeros(num_points,\n                                       num_gts), inside_gt_bbox_mask\n        center_prior_list = []\n        for slvl_points, stride in zip(anchor_points_list, self.strides):\n            # slvl_points: points from single level in FPN, has shape (h*w, 2)\n            # single_level_points has shape (h*w, num_gt, 2)\n            single_level_points = slvl_points[:, None, :].expand(\n                (slvl_points.size(0), len(gt_bboxes), 2))\n            gt_center_x = ((gt_bboxes[:, 0] + gt_bboxes[:, 2]) / 2)\n            gt_center_y = ((gt_bboxes[:, 1] + gt_bboxes[:, 3]) / 2)\n            gt_center = torch.stack((gt_center_x, gt_center_y), dim=1)\n            gt_center = gt_center[None]\n            # instance_center has shape (1, num_gt, 2)\n            instance_center = self.mean[labels][None]\n            # instance_sigma has shape (1, num_gt, 2)\n            instance_sigma = self.sigma[labels][None]\n            # distance has shape (num_points, num_gt, 2)\n            distance = (((single_level_points - gt_center) / float(stride) -\n                         instance_center)**2)\n            center_prior = torch.exp(-distance /\n                                     (2 * instance_sigma**2)).prod(dim=-1)\n            center_prior_list.append(center_prior)\n        center_prior_weights = torch.cat(center_prior_list, dim=0)\n\n        if self.force_topk:\n            gt_inds_no_points_inside = torch.nonzero(\n                inside_gt_bbox_mask.sum(0) == 0).reshape(-1)\n            if gt_inds_no_points_inside.numel():\n                topk_center_index = \\\n                    center_prior_weights[:, gt_inds_no_points_inside].topk(\n                                                             self.topk,\n                                                             dim=0)[1]\n                temp_mask = inside_gt_bbox_mask[:, gt_inds_no_points_inside]\n                inside_gt_bbox_mask[:, gt_inds_no_points_inside] = \\\n                    torch.scatter(temp_mask,\n                                  dim=0,\n                                  index=topk_center_index,\n                                  src=torch.ones_like(\n                                    topk_center_index,\n                                    dtype=torch.bool))\n\n        center_prior_weights[~inside_gt_bbox_mask] = 0\n        return center_prior_weights, inside_gt_bbox_mask\n\n\n@HEADS.register_module()\nclass AutoAssignHead(FCOSHead):\n    \"\"\"AutoAssignHead head used in AutoAssign.\n\n    More details can be found in the `paper\n    <https://arxiv.org/abs/2007.03496>`_ .\n\n    Args:\n        force_topk (bool): Used in center prior initialization to\n            handle extremely small gt. Default is False.\n        topk (int): The number of points used to calculate the\n            center prior when no point falls in gt_bbox. Only work when\n            force_topk if True. Defaults to 9.\n        pos_loss_weight (float): The loss weight of positive loss\n            and with default value 0.25.\n        neg_loss_weight (float): The loss weight of negative loss\n            and with default value 0.75.\n        center_loss_weight (float): The loss weight of center prior\n            loss and with default value 0.75.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 force_topk=False,\n                 topk=9,\n                 pos_loss_weight=0.25,\n                 neg_loss_weight=0.75,\n                 center_loss_weight=0.75,\n                 **kwargs):\n        super().__init__(*args, conv_bias=True, **kwargs)\n        self.center_prior = CenterPrior(\n            force_topk=force_topk,\n            topk=topk,\n            num_classes=self.num_classes,\n            strides=self.strides)\n        self.pos_loss_weight = pos_loss_weight\n        self.neg_loss_weight = neg_loss_weight\n        self.center_loss_weight = center_loss_weight\n        self.prior_generator = MlvlPointGenerator(self.strides, offset=0)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\n\n        In particular, we have special initialization for classified conv's and\n        regression conv's bias\n        \"\"\"\n\n        super(AutoAssignHead, self).init_weights()\n        bias_cls = bias_init_with_prob(0.02)\n        normal_init(self.conv_cls, std=0.01, bias=bias_cls)\n        normal_init(self.conv_reg, std=0.01, bias=4.0)\n\n    def forward_single(self, x, scale, stride):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n            stride (int): The corresponding stride for feature maps, only\n                used to normalize the bbox prediction when self.norm_on_bbox\n                is True.\n\n        Returns:\n            tuple: scores for each class, bbox predictions and centerness \\\n                predictions of input feature maps.\n        \"\"\"\n        cls_score, bbox_pred, cls_feat, reg_feat = super(\n            FCOSHead, self).forward_single(x)\n        centerness = self.conv_centerness(reg_feat)\n        # scale the bbox_pred of different level\n        # float to avoid overflow when enabling FP16\n        bbox_pred = scale(bbox_pred).float()\n        # bbox_pred needed for gradient computation has been modified\n        # by F.relu(bbox_pred) when run with PyTorch 1.10. So replace\n        # F.relu(bbox_pred) with bbox_pred.clamp(min=0)\n        bbox_pred = bbox_pred.clamp(min=0)\n        bbox_pred *= stride\n        return cls_score, bbox_pred, centerness\n\n    def get_pos_loss_single(self, cls_score, objectness, reg_loss, gt_labels,\n                            center_prior_weights):\n        \"\"\"Calculate the positive loss of all points in gt_bboxes.\n\n        Args:\n            cls_score (Tensor): All category scores for each point on\n                the feature map. The shape is (num_points, num_class).\n            objectness (Tensor): Foreground probability of all points,\n                has shape (num_points, 1).\n            reg_loss (Tensor): The regression loss of each gt_bbox and each\n                prediction box, has shape of (num_points, num_gt).\n            gt_labels (Tensor): The zeros based gt_labels of all gt\n                with shape of (num_gt,).\n            center_prior_weights (Tensor): Float tensor with shape\n                of (num_points, num_gt). Each value represents\n                the center weighting coefficient.\n\n        Returns:\n            tuple[Tensor]:\n\n                - pos_loss (Tensor): The positive loss of all points\n                  in the gt_bboxes.\n        \"\"\"\n        # p_loc: localization confidence\n        p_loc = torch.exp(-reg_loss)\n        # p_cls: classification confidence\n        p_cls = (cls_score * objectness)[:, gt_labels]\n        # p_pos: joint confidence indicator\n        p_pos = p_cls * p_loc\n\n        # 3 is a hyper-parameter to control the contributions of high and\n        # low confidence locations towards positive losses.\n        confidence_weight = torch.exp(p_pos * 3)\n        p_pos_weight = (confidence_weight * center_prior_weights) / (\n            (confidence_weight * center_prior_weights).sum(\n                0, keepdim=True)).clamp(min=EPS)\n        reweighted_p_pos = (p_pos * p_pos_weight).sum(0)\n        pos_loss = F.binary_cross_entropy(\n            reweighted_p_pos,\n            torch.ones_like(reweighted_p_pos),\n            reduction='none')\n        pos_loss = pos_loss.sum() * self.pos_loss_weight\n        return pos_loss,\n\n    def get_neg_loss_single(self, cls_score, objectness, gt_labels, ious,\n                            inside_gt_bbox_mask):\n        \"\"\"Calculate the negative loss of all points in feature map.\n\n        Args:\n            cls_score (Tensor): All category scores for each point on\n                the feature map. The shape is (num_points, num_class).\n            objectness (Tensor): Foreground probability of all points\n                and is shape of (num_points, 1).\n            gt_labels (Tensor): The zeros based label of all gt with shape of\n                (num_gt).\n            ious (Tensor): Float tensor with shape of (num_points, num_gt).\n                Each value represent the iou of pred_bbox and gt_bboxes.\n            inside_gt_bbox_mask (Tensor): Tensor of bool type,\n                with shape of (num_points, num_gt), each\n                value is used to mark whether this point falls\n                within a certain gt.\n\n        Returns:\n            tuple[Tensor]:\n\n                - neg_loss (Tensor): The negative loss of all points\n                  in the feature map.\n        \"\"\"\n        num_gts = len(gt_labels)\n        joint_conf = (cls_score * objectness)\n        p_neg_weight = torch.ones_like(joint_conf)\n        if num_gts > 0:\n            # the order of dinmension would affect the value of\n            # p_neg_weight, we strictly follow the original\n            # implementation.\n            inside_gt_bbox_mask = inside_gt_bbox_mask.permute(1, 0)\n            ious = ious.permute(1, 0)\n\n            foreground_idxs = torch.nonzero(inside_gt_bbox_mask, as_tuple=True)\n            temp_weight = (1 / (1 - ious[foreground_idxs]).clamp_(EPS))\n\n            def normalize(x):\n                return (x - x.min() + EPS) / (x.max() - x.min() + EPS)\n\n            for instance_idx in range(num_gts):\n                idxs = foreground_idxs[0] == instance_idx\n                if idxs.any():\n                    temp_weight[idxs] = normalize(temp_weight[idxs])\n\n            p_neg_weight[foreground_idxs[1],\n                         gt_labels[foreground_idxs[0]]] = 1 - temp_weight\n\n        logits = (joint_conf * p_neg_weight)\n        neg_loss = (\n            logits**2 * F.binary_cross_entropy(\n                logits, torch.zeros_like(logits), reduction='none'))\n        neg_loss = neg_loss.sum() * self.neg_loss_weight\n        return neg_loss,\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'objectnesses'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             objectnesses,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * 4.\n            objectnesses (list[Tensor]): objectness for each scale level, each\n                is a 4D-tensor, the channel number is num_points * 1.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n        assert len(cls_scores) == len(bbox_preds) == len(objectnesses)\n        all_num_gt = sum([len(item) for item in gt_bboxes])\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        all_level_points = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=bbox_preds[0].dtype,\n            device=bbox_preds[0].device)\n        inside_gt_bbox_mask_list, bbox_targets_list = self.get_targets(\n            all_level_points, gt_bboxes)\n\n        center_prior_weight_list = []\n        temp_inside_gt_bbox_mask_list = []\n        for gt_bboxe, gt_label, inside_gt_bbox_mask in zip(\n                gt_bboxes, gt_labels, inside_gt_bbox_mask_list):\n            center_prior_weight, inside_gt_bbox_mask = \\\n                self.center_prior(all_level_points, gt_bboxe, gt_label,\n                                  inside_gt_bbox_mask)\n            center_prior_weight_list.append(center_prior_weight)\n            temp_inside_gt_bbox_mask_list.append(inside_gt_bbox_mask)\n        inside_gt_bbox_mask_list = temp_inside_gt_bbox_mask_list\n        mlvl_points = torch.cat(all_level_points, dim=0)\n        bbox_preds = levels_to_images(bbox_preds)\n        cls_scores = levels_to_images(cls_scores)\n        objectnesses = levels_to_images(objectnesses)\n\n        reg_loss_list = []\n        ious_list = []\n        num_points = len(mlvl_points)\n\n        for bbox_pred, encoded_targets, inside_gt_bbox_mask in zip(\n                bbox_preds, bbox_targets_list, inside_gt_bbox_mask_list):\n            temp_num_gt = encoded_targets.size(1)\n            expand_mlvl_points = mlvl_points[:, None, :].expand(\n                num_points, temp_num_gt, 2).reshape(-1, 2)\n            encoded_targets = encoded_targets.reshape(-1, 4)\n            expand_bbox_pred = bbox_pred[:, None, :].expand(\n                num_points, temp_num_gt, 4).reshape(-1, 4)\n            decoded_bbox_preds = self.bbox_coder.decode(\n                expand_mlvl_points, expand_bbox_pred)\n            decoded_target_preds = self.bbox_coder.decode(\n                expand_mlvl_points, encoded_targets)\n            with torch.no_grad():\n                ious = bbox_overlaps(\n                    decoded_bbox_preds, decoded_target_preds, is_aligned=True)\n                ious = ious.reshape(num_points, temp_num_gt)\n                if temp_num_gt:\n                    ious = ious.max(\n                        dim=-1, keepdim=True).values.repeat(1, temp_num_gt)\n                else:\n                    ious = ious.new_zeros(num_points, temp_num_gt)\n                ious[~inside_gt_bbox_mask] = 0\n                ious_list.append(ious)\n            loss_bbox = self.loss_bbox(\n                decoded_bbox_preds,\n                decoded_target_preds,\n                weight=None,\n                reduction_override='none')\n            reg_loss_list.append(loss_bbox.reshape(num_points, temp_num_gt))\n\n        cls_scores = [item.sigmoid() for item in cls_scores]\n        objectnesses = [item.sigmoid() for item in objectnesses]\n        pos_loss_list, = multi_apply(self.get_pos_loss_single, cls_scores,\n                                     objectnesses, reg_loss_list, gt_labels,\n                                     center_prior_weight_list)\n        pos_avg_factor = reduce_mean(\n            bbox_pred.new_tensor(all_num_gt)).clamp_(min=1)\n        pos_loss = sum(pos_loss_list) / pos_avg_factor\n\n        neg_loss_list, = multi_apply(self.get_neg_loss_single, cls_scores,\n                                     objectnesses, gt_labels, ious_list,\n                                     inside_gt_bbox_mask_list)\n        neg_avg_factor = sum(item.data.sum()\n                             for item in center_prior_weight_list)\n        neg_avg_factor = reduce_mean(neg_avg_factor).clamp_(min=1)\n        neg_loss = sum(neg_loss_list) / neg_avg_factor\n\n        center_loss = []\n        for i in range(len(img_metas)):\n\n            if inside_gt_bbox_mask_list[i].any():\n                center_loss.append(\n                    len(gt_bboxes[i]) /\n                    center_prior_weight_list[i].sum().clamp_(min=EPS))\n            # when width or height of gt_bbox is smaller than stride of p3\n            else:\n                center_loss.append(center_prior_weight_list[i].sum() * 0)\n\n        center_loss = torch.stack(center_loss).mean() * self.center_loss_weight\n\n        # avoid dead lock in DDP\n        if all_num_gt == 0:\n            pos_loss = bbox_preds[0].sum() * 0\n            dummy_center_prior_loss = self.center_prior.mean.sum(\n            ) * 0 + self.center_prior.sigma.sum() * 0\n            center_loss = objectnesses[0].sum() * 0 + dummy_center_prior_loss\n\n        loss = dict(\n            loss_pos=pos_loss, loss_neg=neg_loss, loss_center=center_loss)\n\n        return loss\n\n    def get_targets(self, points, gt_bboxes_list):\n        \"\"\"Compute regression targets and each point inside or outside gt_bbox\n        in multiple images.\n\n        Args:\n            points (list[Tensor]): Points of all fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n\n        Returns:\n            tuple(list[Tensor]):\n\n                - inside_gt_bbox_mask_list (list[Tensor]): Each\n                  Tensor is with bool type and shape of\n                  (num_points, num_gt), each value\n                  is used to mark whether this point falls\n                  within a certain gt.\n                - concat_lvl_bbox_targets (list[Tensor]): BBox\n                  targets of each level. Each tensor has shape\n                  (num_points, num_gt, 4).\n        \"\"\"\n\n        concat_points = torch.cat(points, dim=0)\n        # the number of points per img, per lvl\n        inside_gt_bbox_mask_list, bbox_targets_list = multi_apply(\n            self._get_target_single, gt_bboxes_list, points=concat_points)\n        return inside_gt_bbox_mask_list, bbox_targets_list\n\n    def _get_target_single(self, gt_bboxes, points):\n        \"\"\"Compute regression targets and each point inside or outside gt_bbox\n        for a single image.\n\n        Args:\n            gt_bboxes (Tensor): gt_bbox of single image, has shape\n                (num_gt, 4).\n            points (Tensor): Points of all fpn level, has shape\n                (num_points, 2).\n\n        Returns:\n            tuple[Tensor]: Containing the following Tensors:\n\n                - inside_gt_bbox_mask (Tensor): Bool tensor with shape\n                  (num_points, num_gt), each value is used to mark\n                  whether this point falls within a certain gt.\n                - bbox_targets (Tensor): BBox targets of each points with\n                  each gt_bboxes, has shape (num_points, num_gt, 4).\n        \"\"\"\n        num_points = points.size(0)\n        num_gts = gt_bboxes.size(0)\n        gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)\n        xs, ys = points[:, 0], points[:, 1]\n        xs = xs[:, None]\n        ys = ys[:, None]\n        left = xs - gt_bboxes[..., 0]\n        right = gt_bboxes[..., 2] - xs\n        top = ys - gt_bboxes[..., 1]\n        bottom = gt_bboxes[..., 3] - ys\n        bbox_targets = torch.stack((left, top, right, bottom), -1)\n        if num_gts:\n            inside_gt_bbox_mask = bbox_targets.min(-1)[0] > 0\n        else:\n            inside_gt_bbox_mask = bbox_targets.new_zeros((num_points, num_gts),\n                                                         dtype=torch.bool)\n\n        return inside_gt_bbox_mask, bbox_targets\n\n    def _get_points_single(self,\n                           featmap_size,\n                           stride,\n                           dtype,\n                           device,\n                           flatten=False):\n        \"\"\"Almost the same as the implementation in fcos, we remove half stride\n        offset to align with the original implementation.\n\n        This function will be deprecated soon.\n        \"\"\"\n        warnings.warn(\n            '`_get_points_single` in `AutoAssignHead` will be '\n            'deprecated soon, we support a multi level point generator now'\n            'you can get points of a single level feature map '\n            'with `self.prior_generator.single_level_grid_priors` ')\n        y, x = super(FCOSHead,\n                     self)._get_points_single(featmap_size, stride, dtype,\n                                              device)\n        points = torch.stack((x.reshape(-1) * stride, y.reshape(-1) * stride),\n                             dim=-1)\n        return points\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/base_dense_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nimport torch\nfrom mmcv.cnn.utils.weight_init import constant_init\nfrom mmcv.ops import batched_nms\nfrom mmcv.runner import BaseModule, force_fp32\n\nfrom mmdet.core.utils import filter_scores_and_topk, select_single_mlvl\n\n\nclass BaseDenseHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for DenseHeads.\"\"\"\n\n    def __init__(self, init_cfg=None):\n        super(BaseDenseHead, self).__init__(init_cfg)\n\n    def init_weights(self):\n        super(BaseDenseHead, self).init_weights()\n        # avoid init_cfg overwrite the initialization of `conv_offset`\n        for m in self.modules():\n            # DeformConv2dPack, ModulatedDeformConv2dPack\n            if hasattr(m, 'conv_offset'):\n                constant_init(m.conv_offset, 0)\n\n    @abstractmethod\n    def loss(self, **kwargs):\n        \"\"\"Compute losses of the head.\"\"\"\n        pass\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   score_factors=None,\n                   img_metas=None,\n                   cfg=None,\n                   rescale=False,\n                   with_nms=True,\n                   **kwargs):\n        \"\"\"Transform network outputs of a batch into bbox results.\n\n        Note: When score_factors is not None, the cls_scores are\n        usually multiplied by it then obtain the real score used in NMS,\n        such as CenterNess in FCOS, IoU branch in ATSS.\n\n        Args:\n            cls_scores (list[Tensor]): Classification scores for all\n                scale levels, each is a 4D-tensor, has shape\n                (batch_size, num_priors * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for all\n                scale levels, each is a 4D-tensor, has shape\n                (batch_size, num_priors * 4, H, W).\n            score_factors (list[Tensor], Optional): Score factor for\n                all scale level, each is a 4D-tensor, has shape\n                (batch_size, num_priors * 1, H, W). Default None.\n            img_metas (list[dict], Optional): Image meta info. Default None.\n            cfg (mmcv.Config, Optional): Test / postprocessing configuration,\n                if None, test_cfg would be used.  Default None.\n            rescale (bool): If True, return boxes in original image space.\n                Default False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default True.\n\n        Returns:\n            list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is an (n, 5) tensor, where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1. The second item is a\n                (n,) tensor where each item is the predicted class label of\n                the corresponding box.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n\n        if score_factors is None:\n            # e.g. Retina, FreeAnchor, Foveabox, etc.\n            with_score_factors = False\n        else:\n            # e.g. FCOS, PAA, ATSS, AutoAssign, etc.\n            with_score_factors = True\n            assert len(cls_scores) == len(score_factors)\n\n        num_levels = len(cls_scores)\n\n        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]\n        mlvl_priors = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=cls_scores[0].dtype,\n            device=cls_scores[0].device)\n\n        result_list = []\n\n        for img_id in range(len(img_metas)):\n            img_meta = img_metas[img_id]\n            cls_score_list = select_single_mlvl(cls_scores, img_id)\n            bbox_pred_list = select_single_mlvl(bbox_preds, img_id)\n            if with_score_factors:\n                score_factor_list = select_single_mlvl(score_factors, img_id)\n            else:\n                score_factor_list = [None for _ in range(num_levels)]\n\n            results = self._get_bboxes_single(cls_score_list, bbox_pred_list,\n                                              score_factor_list, mlvl_priors,\n                                              img_meta, cfg, rescale, with_nms,\n                                              **kwargs)\n            result_list.append(results)\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_priors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_priors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has shape\n                (num_priors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factor from all scale\n                levels of a single image, each item has shape\n                (num_priors * 1, H, W).\n            mlvl_priors (list[Tensor]): Each element in the list is\n                the priors of a single level in feature pyramid. In all\n                anchor-based methods, it has shape (num_priors, 4). In\n                all anchor-free methods, it has shape (num_priors, 2)\n                when `with_stride=True`, otherwise it still has shape\n                (num_priors, 4).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        if score_factor_list[0] is None:\n            # e.g. Retina, FreeAnchor, etc.\n            with_score_factors = False\n        else:\n            # e.g. FCOS, PAA, ATSS, etc.\n            with_score_factors = True\n\n        cfg = self.test_cfg if cfg is None else cfg\n        img_shape = img_meta['img_shape']\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_labels = []\n        if with_score_factors:\n            mlvl_score_factors = []\n        else:\n            mlvl_score_factors = None\n        for level_idx, (cls_score, bbox_pred, score_factor, priors) in \\\n                enumerate(zip(cls_score_list, bbox_pred_list,\n                              score_factor_list, mlvl_priors)):\n\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            if with_score_factors:\n                score_factor = score_factor.permute(1, 2,\n                                                    0).reshape(-1).sigmoid()\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                # remind that we set FG labels to [0, num_class-1]\n                # since mmdet v2.0\n                # BG cat_id: num_class\n                scores = cls_score.softmax(-1)[:, :-1]\n\n            # After https://github.com/open-mmlab/mmdetection/pull/6268/,\n            # this operation keeps fewer bboxes under the same `nms_pre`.\n            # There is no difference in performance for most models. If you\n            # find a slight drop in performance, you can set a larger\n            # `nms_pre` than before.\n            results = filter_scores_and_topk(\n                scores, cfg.score_thr, nms_pre,\n                dict(bbox_pred=bbox_pred, priors=priors))\n            scores, labels, keep_idxs, filtered_results = results\n\n            bbox_pred = filtered_results['bbox_pred']\n            priors = filtered_results['priors']\n\n            if with_score_factors:\n                score_factor = score_factor[keep_idxs]\n\n            bboxes = self.bbox_coder.decode(\n                priors, bbox_pred, max_shape=img_shape)\n\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_labels.append(labels)\n            if with_score_factors:\n                mlvl_score_factors.append(score_factor)\n\n        return self._bbox_post_process(mlvl_scores, mlvl_labels, mlvl_bboxes,\n                                       img_meta['scale_factor'], cfg, rescale,\n                                       with_nms, mlvl_score_factors, **kwargs)\n\n    def _bbox_post_process(self,\n                           mlvl_scores,\n                           mlvl_labels,\n                           mlvl_bboxes,\n                           scale_factor,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           mlvl_score_factors=None,\n                           **kwargs):\n        \"\"\"bbox post-processing method.\n\n        The boxes would be rescaled to the original image scale and do\n        the nms operation. Usually `with_nms` is False is used for aug test.\n\n        Args:\n            mlvl_scores (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_bboxes, ).\n            mlvl_labels (list[Tensor]): Box class labels from all scale\n                levels of a single image, each item has shape\n                (num_bboxes, ).\n            mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale\n                levels of a single image, each item has shape (num_bboxes, 4).\n            scale_factor (ndarray, optional): Scale factor of the image arange\n                as (w_scale, h_scale, w_scale, h_scale).\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n            mlvl_score_factors (list[Tensor], optional): Score factor from\n                all scale levels of a single image, each item has shape\n                (num_bboxes, ). Default: None.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        assert len(mlvl_scores) == len(mlvl_bboxes) == len(mlvl_labels)\n\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        if rescale:\n            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)\n        mlvl_scores = torch.cat(mlvl_scores)\n        mlvl_labels = torch.cat(mlvl_labels)\n\n        if mlvl_score_factors is not None:\n            # TODO： Add sqrt operation in order to be consistent with\n            #  the paper.\n            mlvl_score_factors = torch.cat(mlvl_score_factors)\n            mlvl_scores = mlvl_scores * mlvl_score_factors\n\n        if with_nms:\n            if mlvl_bboxes.numel() == 0:\n                det_bboxes = torch.cat([mlvl_bboxes, mlvl_scores[:, None]], -1)\n                return det_bboxes, mlvl_labels\n\n            det_bboxes, keep_idxs = batched_nms(mlvl_bboxes, mlvl_scores,\n                                                mlvl_labels, cfg.nms)\n            det_bboxes = det_bboxes[:cfg.max_per_img]\n            det_labels = mlvl_labels[keep_idxs][:cfg.max_per_img]\n            return det_bboxes, det_labels\n        else:\n            return mlvl_bboxes, mlvl_scores, mlvl_labels\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      proposal_cfg=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            x (list[Tensor]): Features from FPN.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            proposal_cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used\n\n        Returns:\n            tuple:\n                losses: (dict[str, Tensor]): A dictionary of loss components.\n                proposal_list (list[Tensor]): Proposals of each image.\n        \"\"\"\n        outs = self(x)\n        if gt_labels is None:\n            loss_inputs = outs + (gt_bboxes, img_metas)\n        else:\n            loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)\n        losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        if proposal_cfg is None:\n            return losses\n        else:\n            proposal_list = self.get_bboxes(\n                *outs, img_metas=img_metas, cfg=proposal_cfg)\n            return losses, proposal_list\n\n    def simple_test(self, feats, img_metas, rescale=False):\n        \"\"\"Test function without test-time augmentation.\n\n        Args:\n            feats (tuple[torch.Tensor]): Multi-level features from the\n                upstream network, each is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is ``bboxes`` with shape (n, 5),\n                where 5 represent (tl_x, tl_y, br_x, br_y, score).\n                The shape of the second tensor in the tuple is ``labels``\n                with shape (n, ).\n        \"\"\"\n        return self.simple_test_bboxes(feats, img_metas, rescale=rescale)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def onnx_export(self,\n                    cls_scores,\n                    bbox_preds,\n                    score_factors=None,\n                    img_metas=None,\n                    with_nms=True):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                with shape (N, num_points * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_points * 4, H, W).\n            score_factors (list[Tensor]): score_factors for each s\n                cale level with shape (N, num_points * 1, H, W).\n                Default: None.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc. Default: None.\n            with_nms (bool): Whether apply nms to the bboxes. Default: True.\n\n        Returns:\n            tuple[Tensor, Tensor] | list[tuple]: When `with_nms` is True,\n            it is tuple[Tensor, Tensor], first tensor bboxes with shape\n            [N, num_det, 5], 5 arrange as (x1, y1, x2, y2, score)\n            and second element is class labels of shape [N, num_det].\n            When `with_nms` is False, first tensor is bboxes with\n            shape [N, num_det, 4], second tensor is raw score has\n            shape  [N, num_det, num_classes].\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n\n        num_levels = len(cls_scores)\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        mlvl_priors = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=bbox_preds[0].dtype,\n            device=bbox_preds[0].device)\n\n        mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)]\n        mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)]\n\n        assert len(\n            img_metas\n        ) == 1, 'Only support one input image while in exporting to ONNX'\n        img_shape = img_metas[0]['img_shape_for_onnx']\n\n        cfg = self.test_cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors)\n        device = cls_scores[0].device\n        batch_size = cls_scores[0].shape[0]\n        # convert to tensor to keep tracing\n        nms_pre_tensor = torch.tensor(\n            cfg.get('nms_pre', -1), device=device, dtype=torch.long)\n\n        # e.g. Retina, FreeAnchor, etc.\n        if score_factors is None:\n            with_score_factors = False\n            mlvl_score_factor = [None for _ in range(num_levels)]\n        else:\n            # e.g. FCOS, PAA, ATSS, etc.\n            with_score_factors = True\n            mlvl_score_factor = [\n                score_factors[i].detach() for i in range(num_levels)\n            ]\n            mlvl_score_factors = []\n\n        mlvl_batch_bboxes = []\n        mlvl_scores = []\n\n        for cls_score, bbox_pred, score_factors, priors in zip(\n                mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor,\n                mlvl_priors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n\n            scores = cls_score.permute(0, 2, 3,\n                                       1).reshape(batch_size, -1,\n                                                  self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = scores.sigmoid()\n                nms_pre_score = scores\n            else:\n                scores = scores.softmax(-1)\n                nms_pre_score = scores\n\n            if with_score_factors:\n                score_factors = score_factors.permute(0, 2, 3, 1).reshape(\n                    batch_size, -1).sigmoid()\n            bbox_pred = bbox_pred.permute(0, 2, 3,\n                                          1).reshape(batch_size, -1, 4)\n            priors = priors.expand(batch_size, -1, priors.size(-1))\n            # Get top-k predictions\n            from mmdet.core.export import get_k_for_topk\n            nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1])\n            if nms_pre > 0:\n\n                if with_score_factors:\n                    nms_pre_score = (nms_pre_score * score_factors[..., None])\n                else:\n                    nms_pre_score = nms_pre_score\n\n                # Get maximum scores for foreground classes.\n                if self.use_sigmoid_cls:\n                    max_scores, _ = nms_pre_score.max(-1)\n                else:\n                    # remind that we set FG labels to [0, num_class-1]\n                    # since mmdet v2.0\n                    # BG cat_id: num_class\n                    max_scores, _ = nms_pre_score[..., :-1].max(-1)\n                _, topk_inds = max_scores.topk(nms_pre)\n\n                batch_inds = torch.arange(\n                    batch_size, device=bbox_pred.device).view(\n                        -1, 1).expand_as(topk_inds).long()\n                # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501\n                transformed_inds = bbox_pred.shape[1] * batch_inds + topk_inds\n                priors = priors.reshape(\n                    -1, priors.size(-1))[transformed_inds, :].reshape(\n                        batch_size, -1, priors.size(-1))\n                bbox_pred = bbox_pred.reshape(-1,\n                                              4)[transformed_inds, :].reshape(\n                                                  batch_size, -1, 4)\n                scores = scores.reshape(\n                    -1, self.cls_out_channels)[transformed_inds, :].reshape(\n                        batch_size, -1, self.cls_out_channels)\n                if with_score_factors:\n                    score_factors = score_factors.reshape(\n                        -1, 1)[transformed_inds].reshape(batch_size, -1)\n\n            bboxes = self.bbox_coder.decode(\n                priors, bbox_pred, max_shape=img_shape)\n\n            mlvl_batch_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            if with_score_factors:\n                mlvl_score_factors.append(score_factors)\n\n        batch_bboxes = torch.cat(mlvl_batch_bboxes, dim=1)\n        batch_scores = torch.cat(mlvl_scores, dim=1)\n        if with_score_factors:\n            batch_score_factors = torch.cat(mlvl_score_factors, dim=1)\n\n        # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment\n\n        from mmdet.core.export import add_dummy_nms_for_onnx\n\n        if not self.use_sigmoid_cls:\n            batch_scores = batch_scores[..., :self.num_classes]\n\n        if with_score_factors:\n            batch_scores = batch_scores * (batch_score_factors.unsqueeze(2))\n\n        if with_nms:\n            max_output_boxes_per_class = cfg.nms.get(\n                'max_output_boxes_per_class', 200)\n            iou_threshold = cfg.nms.get('iou_threshold', 0.5)\n            score_threshold = cfg.score_thr\n            nms_pre = cfg.get('deploy_nms_pre', -1)\n            return add_dummy_nms_for_onnx(batch_bboxes, batch_scores,\n                                          max_output_boxes_per_class,\n                                          iou_threshold, score_threshold,\n                                          nms_pre, cfg.max_per_img)\n        else:\n            return batch_bboxes, batch_scores\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/base_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.runner import BaseModule\n\n\nclass BaseMaskHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for mask heads used in One-Stage Instance Segmentation.\"\"\"\n\n    def __init__(self, init_cfg):\n        super(BaseMaskHead, self).__init__(init_cfg)\n\n    @abstractmethod\n    def loss(self, **kwargs):\n        pass\n\n    @abstractmethod\n    def get_results(self, **kwargs):\n        \"\"\"Get precessed :obj:`InstanceData` of multiple images.\"\"\"\n        pass\n\n    def forward_train(self,\n                      x,\n                      gt_labels,\n                      gt_masks,\n                      img_metas,\n                      gt_bboxes=None,\n                      gt_bboxes_ignore=None,\n                      positive_infos=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            x (list[Tensor] | tuple[Tensor]): Features from FPN.\n                Each has a shape (B, C, H, W).\n            gt_labels (list[Tensor]): Ground truth labels of all images.\n                each has a shape (num_gts,).\n            gt_masks (list[Tensor]) : Masks for each bbox, has a shape\n                (num_gts, h , w).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of the image,\n                each item has a shape (num_gts, 4).\n            gt_bboxes_ignore (list[Tensor], None): Ground truth bboxes to be\n                ignored, each item has a shape (num_ignored_gts, 4).\n            positive_infos (list[:obj:`InstanceData`], optional): Information\n                of positive samples. Used when the label assignment is\n                done outside the MaskHead, e.g., in BboxHead in\n                YOLACT or CondInst, etc. When the label assignment is done in\n                MaskHead, it would be None, like SOLO. All values\n                in it should have shape (num_positive_samples, *).\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        if positive_infos is None:\n            outs = self(x)\n        else:\n            outs = self(x, positive_infos)\n\n        assert isinstance(outs, tuple), 'Forward results should be a tuple, ' \\\n                                        'even if only one item is returned'\n        loss = self.loss(\n            *outs,\n            gt_labels=gt_labels,\n            gt_masks=gt_masks,\n            img_metas=img_metas,\n            gt_bboxes=gt_bboxes,\n            gt_bboxes_ignore=gt_bboxes_ignore,\n            positive_infos=positive_infos,\n            **kwargs)\n        return loss\n\n    def simple_test(self,\n                    feats,\n                    img_metas,\n                    rescale=False,\n                    instances_list=None,\n                    **kwargs):\n        \"\"\"Test function without test-time augmentation.\n\n        Args:\n            feats (tuple[torch.Tensor]): Multi-level features from the\n                upstream network, each is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n            instances_list (list[obj:`InstanceData`], optional): Detection\n                results of each image after the post process. Only exist\n                if there is a `bbox_head`, like `YOLACT`, `CondInst`, etc.\n\n        Returns:\n            list[obj:`InstanceData`]: Instance segmentation \\\n                results of each image after the post process. \\\n                Each item usually contains following keys. \\\n\n                - scores (Tensor): Classification scores, has a shape\n                  (num_instance,)\n                - labels (Tensor): Has a shape (num_instances,).\n                - masks (Tensor): Processed mask results, has a\n                  shape (num_instances, h, w).\n        \"\"\"\n        if instances_list is None:\n            outs = self(feats)\n        else:\n            outs = self(feats, instances_list=instances_list)\n        mask_inputs = outs + (img_metas, )\n        results_list = self.get_results(\n            *mask_inputs,\n            rescale=rescale,\n            instances_list=instances_list,\n            **kwargs)\n        return results_list\n\n    def onnx_export(self, img, img_metas):\n        raise NotImplementedError(f'{self.__class__.__name__} does '\n                                  f'not support ONNX EXPORT')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/cascade_rpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom __future__ import division\nimport copy\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv import ConfigDict\nfrom mmcv.ops import DeformConv2d, batched_nms\nfrom mmcv.runner import BaseModule, ModuleList\n\nfrom mmdet.core import (RegionAssigner, build_assigner, build_sampler,\n                        images_to_levels, multi_apply)\nfrom mmdet.core.utils import select_single_mlvl\nfrom ..builder import HEADS, build_head\nfrom .base_dense_head import BaseDenseHead\nfrom .rpn_head import RPNHead\n\n\nclass AdaptiveConv(BaseModule):\n    \"\"\"AdaptiveConv used to adapt the sampling location with the anchors.\n\n    Args:\n        in_channels (int): Number of channels in the input image\n        out_channels (int): Number of channels produced by the convolution\n        kernel_size (int or tuple): Size of the conv kernel. Default: 3\n        stride (int or tuple, optional): Stride of the convolution. Default: 1\n        padding (int or tuple, optional): Zero-padding added to both sides of\n            the input. Default: 1\n        dilation (int or tuple, optional): Spacing between kernel elements.\n            Default: 3\n        groups (int, optional): Number of blocked connections from input\n            channels to output channels. Default: 1\n        bias (bool, optional): If set True, adds a learnable bias to the\n            output. Default: False.\n        type (str, optional): Type of adaptive conv, can be either 'offset'\n            (arbitrary anchors) or 'dilation' (uniform anchor).\n            Default: 'dilation'.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size=3,\n                 stride=1,\n                 padding=1,\n                 dilation=3,\n                 groups=1,\n                 bias=False,\n                 type='dilation',\n                 init_cfg=dict(\n                     type='Normal', std=0.01, override=dict(name='conv'))):\n        super(AdaptiveConv, self).__init__(init_cfg)\n        assert type in ['offset', 'dilation']\n        self.adapt_type = type\n\n        assert kernel_size == 3, 'Adaptive conv only supports kernels 3'\n        if self.adapt_type == 'offset':\n            assert stride == 1 and padding == 1 and groups == 1, \\\n                'Adaptive conv offset mode only supports padding: {1}, ' \\\n                f'stride: {1}, groups: {1}'\n            self.conv = DeformConv2d(\n                in_channels,\n                out_channels,\n                kernel_size,\n                padding=padding,\n                stride=stride,\n                groups=groups,\n                bias=bias)\n        else:\n            self.conv = nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size,\n                padding=dilation,\n                dilation=dilation)\n\n    def forward(self, x, offset):\n        \"\"\"Forward function.\"\"\"\n        if self.adapt_type == 'offset':\n            N, _, H, W = x.shape\n            assert offset is not None\n            assert H * W == offset.shape[1]\n            # reshape [N, NA, 18] to (N, 18, H, W)\n            offset = offset.permute(0, 2, 1).reshape(N, -1, H, W)\n            offset = offset.contiguous()\n            x = self.conv(x, offset)\n        else:\n            assert offset is None\n            x = self.conv(x)\n        return x\n\n\n@HEADS.register_module()\nclass StageCascadeRPNHead(RPNHead):\n    \"\"\"Stage of CascadeRPNHead.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        anchor_generator (dict): anchor generator config.\n        adapt_cfg (dict): adaptation config.\n        bridged_feature (bool, optional): whether update rpn feature.\n            Default: False.\n        with_cls (bool, optional): whether use classification branch.\n            Default: True.\n        sampling (bool, optional): whether use sampling. Default: True.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 anchor_generator=dict(\n                     type='AnchorGenerator',\n                     scales=[8],\n                     ratios=[1.0],\n                     strides=[4, 8, 16, 32, 64]),\n                 adapt_cfg=dict(type='dilation', dilation=3),\n                 bridged_feature=False,\n                 with_cls=True,\n                 sampling=True,\n                 init_cfg=None,\n                 **kwargs):\n        self.with_cls = with_cls\n        self.anchor_strides = anchor_generator['strides']\n        self.anchor_scales = anchor_generator['scales']\n        self.bridged_feature = bridged_feature\n        self.adapt_cfg = adapt_cfg\n        super(StageCascadeRPNHead, self).__init__(\n            in_channels,\n            anchor_generator=anchor_generator,\n            init_cfg=init_cfg,\n            **kwargs)\n\n        # override sampling and sampler\n        self.sampling = sampling\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # use PseudoSampler when sampling is False\n            if self.sampling and hasattr(self.train_cfg, 'sampler'):\n                sampler_cfg = self.train_cfg.sampler\n            else:\n                sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n\n        if init_cfg is None:\n            self.init_cfg = dict(\n                type='Normal', std=0.01, override=[dict(name='rpn_reg')])\n            if self.with_cls:\n                self.init_cfg['override'].append(dict(name='rpn_cls'))\n\n    def _init_layers(self):\n        \"\"\"Init layers of a CascadeRPN stage.\"\"\"\n        self.rpn_conv = AdaptiveConv(self.in_channels, self.feat_channels,\n                                     **self.adapt_cfg)\n        if self.with_cls:\n            self.rpn_cls = nn.Conv2d(self.feat_channels,\n                                     self.num_anchors * self.cls_out_channels,\n                                     1)\n        self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4, 1)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward_single(self, x, offset):\n        \"\"\"Forward function of single scale.\"\"\"\n        bridged_x = x\n        x = self.relu(self.rpn_conv(x, offset))\n        if self.bridged_feature:\n            bridged_x = x  # update feature\n        cls_score = self.rpn_cls(x) if self.with_cls else None\n        bbox_pred = self.rpn_reg(x)\n        return bridged_x, cls_score, bbox_pred\n\n    def forward(self, feats, offset_list=None):\n        \"\"\"Forward function.\"\"\"\n        if offset_list is None:\n            offset_list = [None for _ in range(len(feats))]\n        return multi_apply(self.forward_single, feats, offset_list)\n\n    def _region_targets_single(self,\n                               anchors,\n                               valid_flags,\n                               gt_bboxes,\n                               gt_bboxes_ignore,\n                               gt_labels,\n                               img_meta,\n                               featmap_sizes,\n                               label_channels=1):\n        \"\"\"Get anchor targets based on region for single level.\"\"\"\n        assign_result = self.assigner.assign(\n            anchors,\n            valid_flags,\n            gt_bboxes,\n            img_meta,\n            featmap_sizes,\n            self.anchor_scales[0],\n            self.anchor_strides,\n            gt_bboxes_ignore=gt_bboxes_ignore,\n            gt_labels=None,\n            allowed_border=self.train_cfg.allowed_border)\n        flat_anchors = torch.cat(anchors)\n        sampling_result = self.sampler.sample(assign_result, flat_anchors,\n                                              gt_bboxes)\n\n        num_anchors = flat_anchors.shape[0]\n        bbox_targets = torch.zeros_like(flat_anchors)\n        bbox_weights = torch.zeros_like(flat_anchors)\n        labels = flat_anchors.new_zeros(num_anchors, dtype=torch.long)\n        label_weights = flat_anchors.new_zeros(num_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            if not self.reg_decoded_bbox:\n                pos_bbox_targets = self.bbox_coder.encode(\n                    sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n            else:\n                pos_bbox_targets = sampling_result.pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            if gt_labels is None:\n                labels[pos_inds] = 1\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,\n                neg_inds)\n\n    def region_targets(self,\n                       anchor_list,\n                       valid_flag_list,\n                       gt_bboxes_list,\n                       img_metas,\n                       featmap_sizes,\n                       gt_bboxes_ignore_list=None,\n                       gt_labels_list=None,\n                       label_channels=1,\n                       unmap_outputs=True):\n        \"\"\"See :func:`StageCascadeRPNHead.get_targets`.\"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        (all_labels, all_label_weights, all_bbox_targets, all_bbox_weights,\n         pos_inds_list, neg_inds_list) = multi_apply(\n             self._region_targets_single,\n             anchor_list,\n             valid_flag_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             img_metas,\n             featmap_sizes=featmap_sizes,\n             label_channels=label_channels)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors)\n        return (labels_list, label_weights_list, bbox_targets_list,\n                bbox_weights_list, num_total_pos, num_total_neg)\n\n    def get_targets(self,\n                    anchor_list,\n                    valid_flag_list,\n                    gt_bboxes,\n                    img_metas,\n                    featmap_sizes,\n                    gt_bboxes_ignore=None,\n                    label_channels=1):\n        \"\"\"Compute regression and classification targets for anchors.\n\n        Args:\n            anchor_list (list[list]): Multi level anchors of each image.\n            valid_flag_list (list[list]): Multi level valid flags of each\n                image.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            featmap_sizes (list[Tensor]): Feature mapsize each level\n            gt_bboxes_ignore (list[Tensor]): Ignore bboxes of each images\n            label_channels (int): Channel of label.\n\n        Returns:\n            cls_reg_targets (tuple)\n        \"\"\"\n        if isinstance(self.assigner, RegionAssigner):\n            cls_reg_targets = self.region_targets(\n                anchor_list,\n                valid_flag_list,\n                gt_bboxes,\n                img_metas,\n                featmap_sizes,\n                gt_bboxes_ignore_list=gt_bboxes_ignore,\n                label_channels=label_channels)\n        else:\n            cls_reg_targets = super(StageCascadeRPNHead, self).get_targets(\n                anchor_list,\n                valid_flag_list,\n                gt_bboxes,\n                img_metas,\n                gt_bboxes_ignore_list=gt_bboxes_ignore,\n                label_channels=label_channels)\n        return cls_reg_targets\n\n    def anchor_offset(self, anchor_list, anchor_strides, featmap_sizes):\n        \"\"\" Get offset for deformable conv based on anchor shape\n        NOTE: currently support deformable kernel_size=3 and dilation=1\n\n        Args:\n            anchor_list (list[list[tensor])): [NI, NLVL, NA, 4] list of\n                multi-level anchors\n            anchor_strides (list[int]): anchor stride of each level\n\n        Returns:\n            offset_list (list[tensor]): [NLVL, NA, 2, 18]: offset of DeformConv\n                kernel.\n        \"\"\"\n\n        def _shape_offset(anchors, stride, ks=3, dilation=1):\n            # currently support kernel_size=3 and dilation=1\n            assert ks == 3 and dilation == 1\n            pad = (ks - 1) // 2\n            idx = torch.arange(-pad, pad + 1, dtype=dtype, device=device)\n            yy, xx = torch.meshgrid(idx, idx)  # return order matters\n            xx = xx.reshape(-1)\n            yy = yy.reshape(-1)\n            w = (anchors[:, 2] - anchors[:, 0]) / stride\n            h = (anchors[:, 3] - anchors[:, 1]) / stride\n            w = w / (ks - 1) - dilation\n            h = h / (ks - 1) - dilation\n            offset_x = w[:, None] * xx  # (NA, ks**2)\n            offset_y = h[:, None] * yy  # (NA, ks**2)\n            return offset_x, offset_y\n\n        def _ctr_offset(anchors, stride, featmap_size):\n            feat_h, feat_w = featmap_size\n            assert len(anchors) == feat_h * feat_w\n\n            x = (anchors[:, 0] + anchors[:, 2]) * 0.5\n            y = (anchors[:, 1] + anchors[:, 3]) * 0.5\n            # compute centers on feature map\n            x = x / stride\n            y = y / stride\n            # compute predefine centers\n            xx = torch.arange(0, feat_w, device=anchors.device)\n            yy = torch.arange(0, feat_h, device=anchors.device)\n            yy, xx = torch.meshgrid(yy, xx)\n            xx = xx.reshape(-1).type_as(x)\n            yy = yy.reshape(-1).type_as(y)\n\n            offset_x = x - xx  # (NA, )\n            offset_y = y - yy  # (NA, )\n            return offset_x, offset_y\n\n        num_imgs = len(anchor_list)\n        num_lvls = len(anchor_list[0])\n        dtype = anchor_list[0][0].dtype\n        device = anchor_list[0][0].device\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n\n        offset_list = []\n        for i in range(num_imgs):\n            mlvl_offset = []\n            for lvl in range(num_lvls):\n                c_offset_x, c_offset_y = _ctr_offset(anchor_list[i][lvl],\n                                                     anchor_strides[lvl],\n                                                     featmap_sizes[lvl])\n                s_offset_x, s_offset_y = _shape_offset(anchor_list[i][lvl],\n                                                       anchor_strides[lvl])\n\n                # offset = ctr_offset + shape_offset\n                offset_x = s_offset_x + c_offset_x[:, None]\n                offset_y = s_offset_y + c_offset_y[:, None]\n\n                # offset order (y0, x0, y1, x2, .., y8, x8, y9, x9)\n                offset = torch.stack([offset_y, offset_x], dim=-1)\n                offset = offset.reshape(offset.size(0), -1)  # [NA, 2*ks**2]\n                mlvl_offset.append(offset)\n            offset_list.append(torch.cat(mlvl_offset))  # [totalNA, 2*ks**2]\n        offset_list = images_to_levels(offset_list, num_level_anchors)\n        return offset_list\n\n    def loss_single(self, cls_score, bbox_pred, anchors, labels, label_weights,\n                    bbox_targets, bbox_weights, num_total_samples):\n        \"\"\"Loss function on single scale.\"\"\"\n        # classification loss\n        if self.with_cls:\n            labels = labels.reshape(-1)\n            label_weights = label_weights.reshape(-1)\n            cls_score = cls_score.permute(0, 2, 3,\n                                          1).reshape(-1, self.cls_out_channels)\n            loss_cls = self.loss_cls(\n                cls_score, labels, label_weights, avg_factor=num_total_samples)\n        # regression loss\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        bbox_weights = bbox_weights.reshape(-1, 4)\n        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n        if self.reg_decoded_bbox:\n            # When the regression loss (e.g. `IouLoss`, `GIouLoss`)\n            # is applied directly on the decoded bounding boxes, it\n            # decodes the already encoded coordinates to absolute format.\n            anchors = anchors.reshape(-1, 4)\n            bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)\n        loss_reg = self.loss_bbox(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            avg_factor=num_total_samples)\n        if self.with_cls:\n            return loss_cls, loss_reg\n        return None, loss_reg\n\n    def loss(self,\n             anchor_list,\n             valid_flag_list,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            anchor_list (list[list]): Multi level anchors of each image.\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss. Default: None\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in bbox_preds]\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            featmap_sizes,\n            gt_bboxes_ignore=gt_bboxes_ignore,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        if self.sampling:\n            num_total_samples = num_total_pos + num_total_neg\n        else:\n            # 200 is hard-coded average factor,\n            # which follows guided anchoring.\n            num_total_samples = sum([label.numel()\n                                     for label in labels_list]) / 200.0\n\n        # change per image, per level anchor_list to per_level, per_image\n        mlvl_anchor_list = list(zip(*anchor_list))\n        # concat mlvl_anchor_list\n        mlvl_anchor_list = [\n            torch.cat(anchors, dim=0) for anchors in mlvl_anchor_list\n        ]\n\n        losses = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            mlvl_anchor_list,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            num_total_samples=num_total_samples)\n        if self.with_cls:\n            return dict(loss_rpn_cls=losses[0], loss_rpn_reg=losses[1])\n        return dict(loss_rpn_reg=losses[1])\n\n    def get_bboxes(self,\n                   anchor_list,\n                   cls_scores,\n                   bbox_preds,\n                   img_metas,\n                   cfg,\n                   rescale=False):\n        \"\"\"Get proposal predict.\n\n        Args:\n            anchor_list (list[list]): Multi level anchors of each image.\n            cls_scores (list[Tensor]): Classification scores for all\n                scale levels, each is a 4D-tensor, has shape\n                (batch_size, num_priors * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for all\n                scale levels, each is a 4D-tensor, has shape\n                (batch_size, num_priors * 4, H, W).\n            img_metas (list[dict], Optional): Image meta info. Default None.\n            cfg (mmcv.Config, Optional): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n\n        Returns:\n            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = select_single_mlvl(cls_scores, img_id)\n            bbox_pred_list = select_single_mlvl(bbox_preds, img_id)\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list,\n                                                anchor_list[img_id], img_shape,\n                                                scale_factor, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_scores,\n                           bbox_preds,\n                           mlvl_anchors,\n                           img_shape,\n                           scale_factor,\n                           cfg,\n                           rescale=False):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_anchors * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has\n                shape (num_anchors * 4, H, W).\n            mlvl_anchors (list[Tensor]): Box reference from all scale\n                levels of a single image, each item has shape\n                (num_total_anchors, 4).\n            img_shape (tuple[int]): Shape of the input image,\n                (height, width, 3).\n            scale_factor (ndarray): Scale factor of the image arange as\n                (w_scale, h_scale, w_scale, h_scale).\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default False.\n\n        Returns:\n            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        cfg = copy.deepcopy(cfg)\n        # bboxes from different level should be independent during NMS,\n        # level_ids are used as labels for batched NMS to separate them\n        level_ids = []\n        mlvl_scores = []\n        mlvl_bbox_preds = []\n        mlvl_valid_anchors = []\n        nms_pre = cfg.get('nms_pre', -1)\n        for idx in range(len(cls_scores)):\n            rpn_cls_score = cls_scores[idx]\n            rpn_bbox_pred = bbox_preds[idx]\n            assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]\n            rpn_cls_score = rpn_cls_score.permute(1, 2, 0)\n            if self.use_sigmoid_cls:\n                rpn_cls_score = rpn_cls_score.reshape(-1)\n                scores = rpn_cls_score.sigmoid()\n            else:\n                rpn_cls_score = rpn_cls_score.reshape(-1, 2)\n                # We set FG labels to [0, num_class-1] and BG label to\n                # num_class in RPN head since mmdet v2.5, which is unified to\n                # be consistent with other head since mmdet v2.0. In mmdet v2.0\n                # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.\n                scores = rpn_cls_score.softmax(dim=1)[:, 0]\n            rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            anchors = mlvl_anchors[idx]\n\n            if 0 < nms_pre < scores.shape[0]:\n                # sort is faster than topk\n                # _, topk_inds = scores.topk(cfg.nms_pre)\n                ranked_scores, rank_inds = scores.sort(descending=True)\n                topk_inds = rank_inds[:nms_pre]\n                scores = ranked_scores[:nms_pre]\n                rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]\n                anchors = anchors[topk_inds, :]\n            mlvl_scores.append(scores)\n            mlvl_bbox_preds.append(rpn_bbox_pred)\n            mlvl_valid_anchors.append(anchors)\n            level_ids.append(\n                scores.new_full((scores.size(0), ), idx, dtype=torch.long))\n\n        scores = torch.cat(mlvl_scores)\n        anchors = torch.cat(mlvl_valid_anchors)\n        rpn_bbox_pred = torch.cat(mlvl_bbox_preds)\n        proposals = self.bbox_coder.decode(\n            anchors, rpn_bbox_pred, max_shape=img_shape)\n        ids = torch.cat(level_ids)\n\n        if cfg.min_bbox_size >= 0:\n            w = proposals[:, 2] - proposals[:, 0]\n            h = proposals[:, 3] - proposals[:, 1]\n            valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)\n            if not valid_mask.all():\n                proposals = proposals[valid_mask]\n                scores = scores[valid_mask]\n                ids = ids[valid_mask]\n\n        # deprecate arguments warning\n        if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg:\n            warnings.warn(\n                'In rpn_proposal or test_cfg, '\n                'nms_thr has been moved to a dict named nms as '\n                'iou_threshold, max_num has been renamed as max_per_img, '\n                'name of original arguments and the way to specify '\n                'iou_threshold of NMS will be deprecated.')\n        if 'nms' not in cfg:\n            cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr))\n        if 'max_num' in cfg:\n            if 'max_per_img' in cfg:\n                assert cfg.max_num == cfg.max_per_img, f'You ' \\\n                    f'set max_num and ' \\\n                    f'max_per_img at the same time, but get {cfg.max_num} ' \\\n                    f'and {cfg.max_per_img} respectively' \\\n                    'Please delete max_num which will be deprecated.'\n            else:\n                cfg.max_per_img = cfg.max_num\n        if 'nms_thr' in cfg:\n            assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set' \\\n                f' iou_threshold in nms and ' \\\n                f'nms_thr at the same time, but get' \\\n                f' {cfg.nms.iou_threshold} and {cfg.nms_thr}' \\\n                f' respectively. Please delete the nms_thr ' \\\n                f'which will be deprecated.'\n\n        if proposals.numel() > 0:\n            dets, _ = batched_nms(proposals, scores, ids, cfg.nms)\n        else:\n            return proposals.new_zeros(0, 5)\n\n        return dets[:cfg.max_per_img]\n\n    def refine_bboxes(self, anchor_list, bbox_preds, img_metas):\n        \"\"\"Refine bboxes through stages.\"\"\"\n        num_levels = len(bbox_preds)\n        new_anchor_list = []\n        for img_id in range(len(img_metas)):\n            mlvl_anchors = []\n            for i in range(num_levels):\n                bbox_pred = bbox_preds[i][img_id].detach()\n                bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n                img_shape = img_metas[img_id]['img_shape']\n                bboxes = self.bbox_coder.decode(anchor_list[img_id][i],\n                                                bbox_pred, img_shape)\n                mlvl_anchors.append(bboxes)\n            new_anchor_list.append(mlvl_anchors)\n        return new_anchor_list\n\n\n@HEADS.register_module()\nclass CascadeRPNHead(BaseDenseHead):\n    \"\"\"The CascadeRPNHead will predict more accurate region proposals, which is\n    required for two-stage detectors (such as Fast/Faster R-CNN). CascadeRPN\n    consists of a sequence of RPNStage to progressively improve the accuracy of\n    the detected proposals.\n\n    More details can be found in ``https://arxiv.org/abs/1909.06720``.\n\n    Args:\n        num_stages (int): number of CascadeRPN stages.\n        stages (list[dict]): list of configs to build the stages.\n        train_cfg (list[dict]): list of configs at training time each stage.\n        test_cfg (dict): config at testing time.\n    \"\"\"\n\n    def __init__(self, num_stages, stages, train_cfg, test_cfg, init_cfg=None):\n        super(CascadeRPNHead, self).__init__(init_cfg)\n        assert num_stages == len(stages)\n        self.num_stages = num_stages\n        # Be careful! Pretrained weights cannot be loaded when use\n        # nn.ModuleList\n        self.stages = ModuleList()\n        for i in range(len(stages)):\n            train_cfg_i = train_cfg[i] if train_cfg is not None else None\n            stages[i].update(train_cfg=train_cfg_i)\n            stages[i].update(test_cfg=test_cfg)\n            self.stages.append(build_head(stages[i]))\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def loss(self):\n        \"\"\"loss() is implemented in StageCascadeRPNHead.\"\"\"\n        pass\n\n    def get_bboxes(self):\n        \"\"\"get_bboxes() is implemented in StageCascadeRPNHead.\"\"\"\n        pass\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      proposal_cfg=None):\n        \"\"\"Forward train function.\"\"\"\n        assert gt_labels is None, 'RPN does not require gt_labels'\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in x]\n        device = x[0].device\n        anchor_list, valid_flag_list = self.stages[0].get_anchors(\n            featmap_sizes, img_metas, device=device)\n\n        losses = dict()\n\n        for i in range(self.num_stages):\n            stage = self.stages[i]\n\n            if stage.adapt_cfg['type'] == 'offset':\n                offset_list = stage.anchor_offset(anchor_list,\n                                                  stage.anchor_strides,\n                                                  featmap_sizes)\n            else:\n                offset_list = None\n            x, cls_score, bbox_pred = stage(x, offset_list)\n            rpn_loss_inputs = (anchor_list, valid_flag_list, cls_score,\n                               bbox_pred, gt_bboxes, img_metas)\n            stage_loss = stage.loss(*rpn_loss_inputs)\n            for name, value in stage_loss.items():\n                losses['s{}.{}'.format(i, name)] = value\n\n            # refine boxes\n            if i < self.num_stages - 1:\n                anchor_list = stage.refine_bboxes(anchor_list, bbox_pred,\n                                                  img_metas)\n        if proposal_cfg is None:\n            return losses\n        else:\n            proposal_list = self.stages[-1].get_bboxes(anchor_list, cls_score,\n                                                       bbox_pred, img_metas,\n                                                       self.test_cfg)\n            return losses, proposal_list\n\n    def simple_test_rpn(self, x, img_metas):\n        \"\"\"Simple forward test function.\"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in x]\n        device = x[0].device\n        anchor_list, _ = self.stages[0].get_anchors(\n            featmap_sizes, img_metas, device=device)\n\n        for i in range(self.num_stages):\n            stage = self.stages[i]\n            if stage.adapt_cfg['type'] == 'offset':\n                offset_list = stage.anchor_offset(anchor_list,\n                                                  stage.anchor_strides,\n                                                  featmap_sizes)\n            else:\n                offset_list = None\n            x, cls_score, bbox_pred = stage(x, offset_list)\n            if i < self.num_stages - 1:\n                anchor_list = stage.refine_bboxes(anchor_list, bbox_pred,\n                                                  img_metas)\n\n        proposal_list = self.stages[-1].get_bboxes(anchor_list, cls_score,\n                                                   bbox_pred, img_metas,\n                                                   self.test_cfg)\n        return proposal_list\n\n    def aug_test_rpn(self, x, img_metas):\n        \"\"\"Augmented forward test function.\"\"\"\n        raise NotImplementedError(\n            'CascadeRPNHead does not support test-time augmentation')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/centernet_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import bias_init_with_prob, normal_init\nfrom mmcv.ops import batched_nms\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import multi_apply\nfrom mmdet.models import HEADS, build_loss\nfrom mmdet.models.utils import gaussian_radius, gen_gaussian_target\nfrom ..utils.gaussian_target import (get_local_maximum, get_topk_from_heatmap,\n                                     transpose_and_gather_feat)\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\n\n\n@HEADS.register_module()\nclass CenterNetHead(BaseDenseHead, BBoxTestMixin):\n    \"\"\"Objects as Points Head. CenterHead use center_point to indicate object's\n    position. Paper link <https://arxiv.org/abs/1904.07850>\n\n    Args:\n        in_channel (int): Number of channel in the input feature map.\n        feat_channel (int): Number of channel in the intermediate feature map.\n        num_classes (int): Number of categories excluding the background\n            category.\n        loss_center_heatmap (dict | None): Config of center heatmap loss.\n            Default: GaussianFocalLoss.\n        loss_wh (dict | None): Config of wh loss. Default: L1Loss.\n        loss_offset (dict | None): Config of offset loss. Default: L1Loss.\n        train_cfg (dict | None): Training config. Useless in CenterNet,\n            but we keep this variable for SingleStageDetector. Default: None.\n        test_cfg (dict | None): Testing config of CenterNet. Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channel,\n                 feat_channel,\n                 num_classes,\n                 loss_center_heatmap=dict(\n                     type='GaussianFocalLoss', loss_weight=1.0),\n                 loss_wh=dict(type='L1Loss', loss_weight=0.1),\n                 loss_offset=dict(type='L1Loss', loss_weight=1.0),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None):\n        super(CenterNetHead, self).__init__(init_cfg)\n        self.num_classes = num_classes\n        self.heatmap_head = self._build_head(in_channel, feat_channel,\n                                             num_classes)\n        self.wh_head = self._build_head(in_channel, feat_channel, 2)\n        self.offset_head = self._build_head(in_channel, feat_channel, 2)\n\n        self.loss_center_heatmap = build_loss(loss_center_heatmap)\n        self.loss_wh = build_loss(loss_wh)\n        self.loss_offset = build_loss(loss_offset)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.fp16_enabled = False\n\n    def _build_head(self, in_channel, feat_channel, out_channel):\n        \"\"\"Build head for each branch.\"\"\"\n        layer = nn.Sequential(\n            nn.Conv2d(in_channel, feat_channel, kernel_size=3, padding=1),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(feat_channel, out_channel, kernel_size=1))\n        return layer\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\"\"\"\n        bias_init = bias_init_with_prob(0.1)\n        self.heatmap_head[-1].bias.data.fill_(bias_init)\n        for head in [self.wh_head, self.offset_head]:\n            for m in head.modules():\n                if isinstance(m, nn.Conv2d):\n                    normal_init(m, std=0.001)\n\n    def forward(self, feats):\n        \"\"\"Forward features. Notice CenterNet head does not use FPN.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            center_heatmap_preds (List[Tensor]): center predict heatmaps for\n                all levels, the channels number is num_classes.\n            wh_preds (List[Tensor]): wh predicts for all levels, the channels\n                number is 2.\n            offset_preds (List[Tensor]): offset predicts for all levels, the\n               channels number is 2.\n        \"\"\"\n        return multi_apply(self.forward_single, feats)\n\n    def forward_single(self, feat):\n        \"\"\"Forward feature of a single level.\n\n        Args:\n            feat (Tensor): Feature of a single level.\n\n        Returns:\n            center_heatmap_pred (Tensor): center predict heatmaps, the\n               channels number is num_classes.\n            wh_pred (Tensor): wh predicts, the channels number is 2.\n            offset_pred (Tensor): offset predicts, the channels number is 2.\n        \"\"\"\n        center_heatmap_pred = self.heatmap_head(feat).sigmoid()\n        wh_pred = self.wh_head(feat)\n        offset_pred = self.offset_head(feat)\n        return center_heatmap_pred, wh_pred, offset_pred\n\n    @force_fp32(apply_to=('center_heatmap_preds', 'wh_preds', 'offset_preds'))\n    def loss(self,\n             center_heatmap_preds,\n             wh_preds,\n             offset_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            center_heatmap_preds (list[Tensor]): center predict heatmaps for\n               all levels with shape (B, num_classes, H, W).\n            wh_preds (list[Tensor]): wh predicts for all levels with\n               shape (B, 2, H, W).\n            offset_preds (list[Tensor]): offset predicts for all levels\n               with shape (B, 2, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss. Default: None\n\n        Returns:\n            dict[str, Tensor]: which has components below:\n                - loss_center_heatmap (Tensor): loss of center heatmap.\n                - loss_wh (Tensor): loss of hw heatmap\n                - loss_offset (Tensor): loss of offset heatmap.\n        \"\"\"\n        assert len(center_heatmap_preds) == len(wh_preds) == len(\n            offset_preds) == 1\n        center_heatmap_pred = center_heatmap_preds[0]\n        wh_pred = wh_preds[0]\n        offset_pred = offset_preds[0]\n\n        target_result, avg_factor = self.get_targets(gt_bboxes, gt_labels,\n                                                     center_heatmap_pred.shape,\n                                                     img_metas[0]['pad_shape'])\n\n        center_heatmap_target = target_result['center_heatmap_target']\n        wh_target = target_result['wh_target']\n        offset_target = target_result['offset_target']\n        wh_offset_target_weight = target_result['wh_offset_target_weight']\n\n        # Since the channel of wh_target and offset_target is 2, the avg_factor\n        # of loss_center_heatmap is always 1/2 of loss_wh and loss_offset.\n        loss_center_heatmap = self.loss_center_heatmap(\n            center_heatmap_pred, center_heatmap_target, avg_factor=avg_factor)\n        loss_wh = self.loss_wh(\n            wh_pred,\n            wh_target,\n            wh_offset_target_weight,\n            avg_factor=avg_factor * 2)\n        loss_offset = self.loss_offset(\n            offset_pred,\n            offset_target,\n            wh_offset_target_weight,\n            avg_factor=avg_factor * 2)\n        return dict(\n            loss_center_heatmap=loss_center_heatmap,\n            loss_wh=loss_wh,\n            loss_offset=loss_offset)\n\n    def get_targets(self, gt_bboxes, gt_labels, feat_shape, img_shape):\n        \"\"\"Compute regression and classification targets in multiple images.\n\n        Args:\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box.\n            feat_shape (list[int]): feature map shape with value [B, _, H, W]\n            img_shape (list[int]): image shape in [h, w] format.\n\n        Returns:\n            tuple[dict,float]: The float value is mean avg_factor, the dict has\n               components below:\n               - center_heatmap_target (Tensor): targets of center heatmap, \\\n                   shape (B, num_classes, H, W).\n               - wh_target (Tensor): targets of wh predict, shape \\\n                   (B, 2, H, W).\n               - offset_target (Tensor): targets of offset predict, shape \\\n                   (B, 2, H, W).\n               - wh_offset_target_weight (Tensor): weights of wh and offset \\\n                   predict, shape (B, 2, H, W).\n        \"\"\"\n        img_h, img_w = img_shape[:2]\n        bs, _, feat_h, feat_w = feat_shape\n\n        width_ratio = float(feat_w / img_w)\n        height_ratio = float(feat_h / img_h)\n\n        center_heatmap_target = gt_bboxes[-1].new_zeros(\n            [bs, self.num_classes, feat_h, feat_w])\n        wh_target = gt_bboxes[-1].new_zeros([bs, 2, feat_h, feat_w])\n        offset_target = gt_bboxes[-1].new_zeros([bs, 2, feat_h, feat_w])\n        wh_offset_target_weight = gt_bboxes[-1].new_zeros(\n            [bs, 2, feat_h, feat_w])\n\n        for batch_id in range(bs):\n            gt_bbox = gt_bboxes[batch_id]\n            gt_label = gt_labels[batch_id]\n            center_x = (gt_bbox[:, [0]] + gt_bbox[:, [2]]) * width_ratio / 2\n            center_y = (gt_bbox[:, [1]] + gt_bbox[:, [3]]) * height_ratio / 2\n            gt_centers = torch.cat((center_x, center_y), dim=1)\n\n            for j, ct in enumerate(gt_centers):\n                ctx_int, cty_int = ct.int()\n                ctx, cty = ct\n                scale_box_h = (gt_bbox[j][3] - gt_bbox[j][1]) * height_ratio\n                scale_box_w = (gt_bbox[j][2] - gt_bbox[j][0]) * width_ratio\n                radius = gaussian_radius([scale_box_h, scale_box_w],\n                                         min_overlap=0.3)\n                radius = max(0, int(radius))\n                ind = gt_label[j]\n                gen_gaussian_target(center_heatmap_target[batch_id, ind],\n                                    [ctx_int, cty_int], radius)\n\n                wh_target[batch_id, 0, cty_int, ctx_int] = scale_box_w\n                wh_target[batch_id, 1, cty_int, ctx_int] = scale_box_h\n\n                offset_target[batch_id, 0, cty_int, ctx_int] = ctx - ctx_int\n                offset_target[batch_id, 1, cty_int, ctx_int] = cty - cty_int\n\n                wh_offset_target_weight[batch_id, :, cty_int, ctx_int] = 1\n\n        avg_factor = max(1, center_heatmap_target.eq(1).sum())\n        target_result = dict(\n            center_heatmap_target=center_heatmap_target,\n            wh_target=wh_target,\n            offset_target=offset_target,\n            wh_offset_target_weight=wh_offset_target_weight)\n        return target_result, avg_factor\n\n    @force_fp32(apply_to=('center_heatmap_preds', 'wh_preds', 'offset_preds'))\n    def get_bboxes(self,\n                   center_heatmap_preds,\n                   wh_preds,\n                   offset_preds,\n                   img_metas,\n                   rescale=True,\n                   with_nms=False):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            center_heatmap_preds (list[Tensor]): Center predict heatmaps for\n                all levels with shape (B, num_classes, H, W).\n            wh_preds (list[Tensor]): WH predicts for all levels with\n                shape (B, 2, H, W).\n            offset_preds (list[Tensor]): Offset predicts for all levels\n                with shape (B, 2, H, W).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: True.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: False.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is an (n, 5) tensor, where 5 represent\n                (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.\n                The shape of the second tensor in the tuple is (n,), and\n                each element represents the class label of the corresponding\n                box.\n        \"\"\"\n        assert len(center_heatmap_preds) == len(wh_preds) == len(\n            offset_preds) == 1\n        result_list = []\n        for img_id in range(len(img_metas)):\n            result_list.append(\n                self._get_bboxes_single(\n                    center_heatmap_preds[0][img_id:img_id + 1, ...],\n                    wh_preds[0][img_id:img_id + 1, ...],\n                    offset_preds[0][img_id:img_id + 1, ...],\n                    img_metas[img_id],\n                    rescale=rescale,\n                    with_nms=with_nms))\n        return result_list\n\n    def _get_bboxes_single(self,\n                           center_heatmap_pred,\n                           wh_pred,\n                           offset_pred,\n                           img_meta,\n                           rescale=False,\n                           with_nms=True):\n        \"\"\"Transform outputs of a single image into bbox results.\n\n        Args:\n            center_heatmap_pred (Tensor): Center heatmap for current level with\n                shape (1, num_classes, H, W).\n            wh_pred (Tensor): WH heatmap for current level with shape\n                (1, num_classes, H, W).\n            offset_pred (Tensor): Offset for current level with shape\n                (1, corner_offset_channels, H, W).\n            img_meta (dict): Meta information of current image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor, Tensor]: The first item is an (n, 5) tensor, where\n                5 represent (tl_x, tl_y, br_x, br_y, score) and the score\n                between 0 and 1. The shape of the second tensor in the tuple\n                is (n,), and each element represents the class label of the\n                corresponding box.\n        \"\"\"\n        batch_det_bboxes, batch_labels = self.decode_heatmap(\n            center_heatmap_pred,\n            wh_pred,\n            offset_pred,\n            img_meta['batch_input_shape'],\n            k=self.test_cfg.topk,\n            kernel=self.test_cfg.local_maximum_kernel)\n\n        det_bboxes = batch_det_bboxes.view([-1, 5])\n        det_labels = batch_labels.view(-1)\n\n        batch_border = det_bboxes.new_tensor(img_meta['border'])[...,\n                                                                 [2, 0, 2, 0]]\n        det_bboxes[..., :4] -= batch_border\n\n        if rescale:\n            det_bboxes[..., :4] /= det_bboxes.new_tensor(\n                img_meta['scale_factor'])\n\n        if with_nms:\n            det_bboxes, det_labels = self._bboxes_nms(det_bboxes, det_labels,\n                                                      self.test_cfg)\n        return det_bboxes, det_labels\n\n    def decode_heatmap(self,\n                       center_heatmap_pred,\n                       wh_pred,\n                       offset_pred,\n                       img_shape,\n                       k=100,\n                       kernel=3):\n        \"\"\"Transform outputs into detections raw bbox prediction.\n\n        Args:\n            center_heatmap_pred (Tensor): center predict heatmap,\n               shape (B, num_classes, H, W).\n            wh_pred (Tensor): wh predict, shape (B, 2, H, W).\n            offset_pred (Tensor): offset predict, shape (B, 2, H, W).\n            img_shape (list[int]): image shape in [h, w] format.\n            k (int): Get top k center keypoints from heatmap. Default 100.\n            kernel (int): Max pooling kernel for extract local maximum pixels.\n               Default 3.\n\n        Returns:\n            tuple[torch.Tensor]: Decoded output of CenterNetHead, containing\n               the following Tensors:\n\n              - batch_bboxes (Tensor): Coords of each box with shape (B, k, 5)\n              - batch_topk_labels (Tensor): Categories of each box with \\\n                  shape (B, k)\n        \"\"\"\n        height, width = center_heatmap_pred.shape[2:]\n        inp_h, inp_w = img_shape\n\n        center_heatmap_pred = get_local_maximum(\n            center_heatmap_pred, kernel=kernel)\n\n        *batch_dets, topk_ys, topk_xs = get_topk_from_heatmap(\n            center_heatmap_pred, k=k)\n        batch_scores, batch_index, batch_topk_labels = batch_dets\n\n        wh = transpose_and_gather_feat(wh_pred, batch_index)\n        offset = transpose_and_gather_feat(offset_pred, batch_index)\n        topk_xs = topk_xs + offset[..., 0]\n        topk_ys = topk_ys + offset[..., 1]\n        tl_x = (topk_xs - wh[..., 0] / 2) * (inp_w / width)\n        tl_y = (topk_ys - wh[..., 1] / 2) * (inp_h / height)\n        br_x = (topk_xs + wh[..., 0] / 2) * (inp_w / width)\n        br_y = (topk_ys + wh[..., 1] / 2) * (inp_h / height)\n\n        batch_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], dim=2)\n        batch_bboxes = torch.cat((batch_bboxes, batch_scores[..., None]),\n                                 dim=-1)\n        return batch_bboxes, batch_topk_labels\n\n    def _bboxes_nms(self, bboxes, labels, cfg):\n        if labels.numel() > 0:\n            max_num = cfg.max_per_img\n            bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:,\n                                                             -1].contiguous(),\n                                       labels, cfg.nms)\n            if max_num > 0:\n                bboxes = bboxes[:max_num]\n                labels = labels[keep][:max_num]\n\n        return bboxes, labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/centripetal_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, normal_init\nfrom mmcv.ops import DeformConv2d\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import multi_apply\nfrom ..builder import HEADS, build_loss\nfrom .corner_head import CornerHead\n\n\n@HEADS.register_module()\nclass CentripetalHead(CornerHead):\n    \"\"\"Head of CentripetalNet: Pursuing High-quality Keypoint Pairs for Object\n    Detection.\n\n    CentripetalHead inherits from :class:`CornerHead`. It removes the\n    embedding branch and adds guiding shift and centripetal shift branches.\n    More details can be found in the `paper\n    <https://arxiv.org/abs/2003.09119>`_ .\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        num_feat_levels (int): Levels of feature from the previous module. 2\n            for HourglassNet-104 and 1 for HourglassNet-52. HourglassNet-104\n            outputs the final feature and intermediate supervision feature and\n            HourglassNet-52 only outputs the final feature. Default: 2.\n        corner_emb_channels (int): Channel of embedding vector. Default: 1.\n        train_cfg (dict | None): Training config. Useless in CornerHead,\n            but we keep this variable for SingleStageDetector. Default: None.\n        test_cfg (dict | None): Testing config of CornerHead. Default: None.\n        loss_heatmap (dict | None): Config of corner heatmap loss. Default:\n            GaussianFocalLoss.\n        loss_embedding (dict | None): Config of corner embedding loss. Default:\n            AssociativeEmbeddingLoss.\n        loss_offset (dict | None): Config of corner offset loss. Default:\n            SmoothL1Loss.\n        loss_guiding_shift (dict): Config of guiding shift loss. Default:\n            SmoothL1Loss.\n        loss_centripetal_shift (dict): Config of centripetal shift loss.\n            Default: SmoothL1Loss.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 centripetal_shift_channels=2,\n                 guiding_shift_channels=2,\n                 feat_adaption_conv_kernel=3,\n                 loss_guiding_shift=dict(\n                     type='SmoothL1Loss', beta=1.0, loss_weight=0.05),\n                 loss_centripetal_shift=dict(\n                     type='SmoothL1Loss', beta=1.0, loss_weight=1),\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        assert centripetal_shift_channels == 2, (\n            'CentripetalHead only support centripetal_shift_channels == 2')\n        self.centripetal_shift_channels = centripetal_shift_channels\n        assert guiding_shift_channels == 2, (\n            'CentripetalHead only support guiding_shift_channels == 2')\n        self.guiding_shift_channels = guiding_shift_channels\n        self.feat_adaption_conv_kernel = feat_adaption_conv_kernel\n        super(CentripetalHead, self).__init__(\n            *args, init_cfg=init_cfg, **kwargs)\n        self.loss_guiding_shift = build_loss(loss_guiding_shift)\n        self.loss_centripetal_shift = build_loss(loss_centripetal_shift)\n\n    def _init_centripetal_layers(self):\n        \"\"\"Initialize centripetal layers.\n\n        Including feature adaption deform convs (feat_adaption), deform offset\n        prediction convs (dcn_off), guiding shift (guiding_shift) and\n        centripetal shift ( centripetal_shift). Each branch has two parts:\n        prefix `tl_` for top-left and `br_` for bottom-right.\n        \"\"\"\n        self.tl_feat_adaption = nn.ModuleList()\n        self.br_feat_adaption = nn.ModuleList()\n        self.tl_dcn_offset = nn.ModuleList()\n        self.br_dcn_offset = nn.ModuleList()\n        self.tl_guiding_shift = nn.ModuleList()\n        self.br_guiding_shift = nn.ModuleList()\n        self.tl_centripetal_shift = nn.ModuleList()\n        self.br_centripetal_shift = nn.ModuleList()\n\n        for _ in range(self.num_feat_levels):\n            self.tl_feat_adaption.append(\n                DeformConv2d(self.in_channels, self.in_channels,\n                             self.feat_adaption_conv_kernel, 1, 1))\n            self.br_feat_adaption.append(\n                DeformConv2d(self.in_channels, self.in_channels,\n                             self.feat_adaption_conv_kernel, 1, 1))\n\n            self.tl_guiding_shift.append(\n                self._make_layers(\n                    out_channels=self.guiding_shift_channels,\n                    in_channels=self.in_channels))\n            self.br_guiding_shift.append(\n                self._make_layers(\n                    out_channels=self.guiding_shift_channels,\n                    in_channels=self.in_channels))\n\n            self.tl_dcn_offset.append(\n                ConvModule(\n                    self.guiding_shift_channels,\n                    self.feat_adaption_conv_kernel**2 *\n                    self.guiding_shift_channels,\n                    1,\n                    bias=False,\n                    act_cfg=None))\n            self.br_dcn_offset.append(\n                ConvModule(\n                    self.guiding_shift_channels,\n                    self.feat_adaption_conv_kernel**2 *\n                    self.guiding_shift_channels,\n                    1,\n                    bias=False,\n                    act_cfg=None))\n\n            self.tl_centripetal_shift.append(\n                self._make_layers(\n                    out_channels=self.centripetal_shift_channels,\n                    in_channels=self.in_channels))\n            self.br_centripetal_shift.append(\n                self._make_layers(\n                    out_channels=self.centripetal_shift_channels,\n                    in_channels=self.in_channels))\n\n    def _init_layers(self):\n        \"\"\"Initialize layers for CentripetalHead.\n\n        Including two parts: CornerHead layers and CentripetalHead layers\n        \"\"\"\n        super()._init_layers()  # using _init_layers in CornerHead\n        self._init_centripetal_layers()\n\n    def init_weights(self):\n        super(CentripetalHead, self).init_weights()\n        for i in range(self.num_feat_levels):\n            normal_init(self.tl_feat_adaption[i], std=0.01)\n            normal_init(self.br_feat_adaption[i], std=0.01)\n            normal_init(self.tl_dcn_offset[i].conv, std=0.1)\n            normal_init(self.br_dcn_offset[i].conv, std=0.1)\n            _ = [x.conv.reset_parameters() for x in self.tl_guiding_shift[i]]\n            _ = [x.conv.reset_parameters() for x in self.br_guiding_shift[i]]\n            _ = [\n                x.conv.reset_parameters() for x in self.tl_centripetal_shift[i]\n            ]\n            _ = [\n                x.conv.reset_parameters() for x in self.br_centripetal_shift[i]\n            ]\n\n    def forward_single(self, x, lvl_ind):\n        \"\"\"Forward feature of a single level.\n\n        Args:\n            x (Tensor): Feature of a single level.\n            lvl_ind (int): Level index of current feature.\n\n        Returns:\n            tuple[Tensor]: A tuple of CentripetalHead's output for current\n            feature level. Containing the following Tensors:\n\n                - tl_heat (Tensor): Predicted top-left corner heatmap.\n                - br_heat (Tensor): Predicted bottom-right corner heatmap.\n                - tl_off (Tensor): Predicted top-left offset heatmap.\n                - br_off (Tensor): Predicted bottom-right offset heatmap.\n                - tl_guiding_shift (Tensor): Predicted top-left guiding shift\n                  heatmap.\n                - br_guiding_shift (Tensor): Predicted bottom-right guiding\n                  shift heatmap.\n                - tl_centripetal_shift (Tensor): Predicted top-left centripetal\n                  shift heatmap.\n                - br_centripetal_shift (Tensor): Predicted bottom-right\n                  centripetal shift heatmap.\n        \"\"\"\n        tl_heat, br_heat, _, _, tl_off, br_off, tl_pool, br_pool = super(\n        ).forward_single(\n            x, lvl_ind, return_pool=True)\n\n        tl_guiding_shift = self.tl_guiding_shift[lvl_ind](tl_pool)\n        br_guiding_shift = self.br_guiding_shift[lvl_ind](br_pool)\n\n        tl_dcn_offset = self.tl_dcn_offset[lvl_ind](tl_guiding_shift.detach())\n        br_dcn_offset = self.br_dcn_offset[lvl_ind](br_guiding_shift.detach())\n\n        tl_feat_adaption = self.tl_feat_adaption[lvl_ind](tl_pool,\n                                                          tl_dcn_offset)\n        br_feat_adaption = self.br_feat_adaption[lvl_ind](br_pool,\n                                                          br_dcn_offset)\n\n        tl_centripetal_shift = self.tl_centripetal_shift[lvl_ind](\n            tl_feat_adaption)\n        br_centripetal_shift = self.br_centripetal_shift[lvl_ind](\n            br_feat_adaption)\n\n        result_list = [\n            tl_heat, br_heat, tl_off, br_off, tl_guiding_shift,\n            br_guiding_shift, tl_centripetal_shift, br_centripetal_shift\n        ]\n        return result_list\n\n    @force_fp32()\n    def loss(self,\n             tl_heats,\n             br_heats,\n             tl_offs,\n             br_offs,\n             tl_guiding_shifts,\n             br_guiding_shifts,\n             tl_centripetal_shifts,\n             br_centripetal_shifts,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            tl_heats (list[Tensor]): Top-left corner heatmaps for each level\n                with shape (N, num_classes, H, W).\n            br_heats (list[Tensor]): Bottom-right corner heatmaps for each\n                level with shape (N, num_classes, H, W).\n            tl_offs (list[Tensor]): Top-left corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            br_offs (list[Tensor]): Bottom-right corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            tl_guiding_shifts (list[Tensor]): Top-left guiding shifts for each\n                level with shape (N, guiding_shift_channels, H, W).\n            br_guiding_shifts (list[Tensor]): Bottom-right guiding shifts for\n                each level with shape (N, guiding_shift_channels, H, W).\n            tl_centripetal_shifts (list[Tensor]): Top-left centripetal shifts\n                for each level with shape (N, centripetal_shift_channels, H,\n                W).\n            br_centripetal_shifts (list[Tensor]): Bottom-right centripetal\n                shifts for each level with shape (N,\n                centripetal_shift_channels, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [left, top, right, bottom] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components. Containing the\n            following losses:\n\n                - det_loss (list[Tensor]): Corner keypoint losses of all\n                  feature levels.\n                - off_loss (list[Tensor]): Corner offset losses of all feature\n                  levels.\n                - guiding_loss (list[Tensor]): Guiding shift losses of all\n                  feature levels.\n                - centripetal_loss (list[Tensor]): Centripetal shift losses of\n                  all feature levels.\n        \"\"\"\n        targets = self.get_targets(\n            gt_bboxes,\n            gt_labels,\n            tl_heats[-1].shape,\n            img_metas[0]['pad_shape'],\n            with_corner_emb=self.with_corner_emb,\n            with_guiding_shift=True,\n            with_centripetal_shift=True)\n        mlvl_targets = [targets for _ in range(self.num_feat_levels)]\n        [det_losses, off_losses, guiding_losses, centripetal_losses\n         ] = multi_apply(self.loss_single, tl_heats, br_heats, tl_offs,\n                         br_offs, tl_guiding_shifts, br_guiding_shifts,\n                         tl_centripetal_shifts, br_centripetal_shifts,\n                         mlvl_targets)\n        loss_dict = dict(\n            det_loss=det_losses,\n            off_loss=off_losses,\n            guiding_loss=guiding_losses,\n            centripetal_loss=centripetal_losses)\n        return loss_dict\n\n    def loss_single(self, tl_hmp, br_hmp, tl_off, br_off, tl_guiding_shift,\n                    br_guiding_shift, tl_centripetal_shift,\n                    br_centripetal_shift, targets):\n        \"\"\"Compute losses for single level.\n\n        Args:\n            tl_hmp (Tensor): Top-left corner heatmap for current level with\n                shape (N, num_classes, H, W).\n            br_hmp (Tensor): Bottom-right corner heatmap for current level with\n                shape (N, num_classes, H, W).\n            tl_off (Tensor): Top-left corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            br_off (Tensor): Bottom-right corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            tl_guiding_shift (Tensor): Top-left guiding shift for current level\n                with shape (N, guiding_shift_channels, H, W).\n            br_guiding_shift (Tensor): Bottom-right guiding shift for current\n                level with shape (N, guiding_shift_channels, H, W).\n            tl_centripetal_shift (Tensor): Top-left centripetal shift for\n                current level with shape (N, centripetal_shift_channels, H, W).\n            br_centripetal_shift (Tensor): Bottom-right centripetal shift for\n                current level with shape (N, centripetal_shift_channels, H, W).\n            targets (dict): Corner target generated by `get_targets`.\n\n        Returns:\n            tuple[torch.Tensor]: Losses of the head's different branches\n            containing the following losses:\n\n                - det_loss (Tensor): Corner keypoint loss.\n                - off_loss (Tensor): Corner offset loss.\n                - guiding_loss (Tensor): Guiding shift loss.\n                - centripetal_loss (Tensor): Centripetal shift loss.\n        \"\"\"\n        targets['corner_embedding'] = None\n\n        det_loss, _, _, off_loss = super().loss_single(tl_hmp, br_hmp, None,\n                                                       None, tl_off, br_off,\n                                                       targets)\n\n        gt_tl_guiding_shift = targets['topleft_guiding_shift']\n        gt_br_guiding_shift = targets['bottomright_guiding_shift']\n        gt_tl_centripetal_shift = targets['topleft_centripetal_shift']\n        gt_br_centripetal_shift = targets['bottomright_centripetal_shift']\n\n        gt_tl_heatmap = targets['topleft_heatmap']\n        gt_br_heatmap = targets['bottomright_heatmap']\n        # We only compute the offset loss at the real corner position.\n        # The value of real corner would be 1 in heatmap ground truth.\n        # The mask is computed in class agnostic mode and its shape is\n        # batch * 1 * width * height.\n        tl_mask = gt_tl_heatmap.eq(1).sum(1).gt(0).unsqueeze(1).type_as(\n            gt_tl_heatmap)\n        br_mask = gt_br_heatmap.eq(1).sum(1).gt(0).unsqueeze(1).type_as(\n            gt_br_heatmap)\n\n        # Guiding shift loss\n        tl_guiding_loss = self.loss_guiding_shift(\n            tl_guiding_shift,\n            gt_tl_guiding_shift,\n            tl_mask,\n            avg_factor=tl_mask.sum())\n        br_guiding_loss = self.loss_guiding_shift(\n            br_guiding_shift,\n            gt_br_guiding_shift,\n            br_mask,\n            avg_factor=br_mask.sum())\n        guiding_loss = (tl_guiding_loss + br_guiding_loss) / 2.0\n        # Centripetal shift loss\n        tl_centripetal_loss = self.loss_centripetal_shift(\n            tl_centripetal_shift,\n            gt_tl_centripetal_shift,\n            tl_mask,\n            avg_factor=tl_mask.sum())\n        br_centripetal_loss = self.loss_centripetal_shift(\n            br_centripetal_shift,\n            gt_br_centripetal_shift,\n            br_mask,\n            avg_factor=br_mask.sum())\n        centripetal_loss = (tl_centripetal_loss + br_centripetal_loss) / 2.0\n\n        return det_loss, off_loss, guiding_loss, centripetal_loss\n\n    @force_fp32()\n    def get_bboxes(self,\n                   tl_heats,\n                   br_heats,\n                   tl_offs,\n                   br_offs,\n                   tl_guiding_shifts,\n                   br_guiding_shifts,\n                   tl_centripetal_shifts,\n                   br_centripetal_shifts,\n                   img_metas,\n                   rescale=False,\n                   with_nms=True):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            tl_heats (list[Tensor]): Top-left corner heatmaps for each level\n                with shape (N, num_classes, H, W).\n            br_heats (list[Tensor]): Bottom-right corner heatmaps for each\n                level with shape (N, num_classes, H, W).\n            tl_offs (list[Tensor]): Top-left corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            br_offs (list[Tensor]): Bottom-right corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            tl_guiding_shifts (list[Tensor]): Top-left guiding shifts for each\n                level with shape (N, guiding_shift_channels, H, W). Useless in\n                this function, we keep this arg because it's the raw output\n                from CentripetalHead.\n            br_guiding_shifts (list[Tensor]): Bottom-right guiding shifts for\n                each level with shape (N, guiding_shift_channels, H, W).\n                Useless in this function, we keep this arg because it's the\n                raw output from CentripetalHead.\n            tl_centripetal_shifts (list[Tensor]): Top-left centripetal shifts\n                for each level with shape (N, centripetal_shift_channels, H,\n                W).\n            br_centripetal_shifts (list[Tensor]): Bottom-right centripetal\n                shifts for each level with shape (N,\n                centripetal_shift_channels, H, W).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n        \"\"\"\n        assert tl_heats[-1].shape[0] == br_heats[-1].shape[0] == len(img_metas)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            result_list.append(\n                self._get_bboxes_single(\n                    tl_heats[-1][img_id:img_id + 1, :],\n                    br_heats[-1][img_id:img_id + 1, :],\n                    tl_offs[-1][img_id:img_id + 1, :],\n                    br_offs[-1][img_id:img_id + 1, :],\n                    img_metas[img_id],\n                    tl_emb=None,\n                    br_emb=None,\n                    tl_centripetal_shift=tl_centripetal_shifts[-1][\n                        img_id:img_id + 1, :],\n                    br_centripetal_shift=br_centripetal_shifts[-1][\n                        img_id:img_id + 1, :],\n                    rescale=rescale,\n                    with_nms=with_nms))\n\n        return result_list\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/corner_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom logging import warning\nfrom math import ceil, log\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, bias_init_with_prob\nfrom mmcv.ops import CornerPool, batched_nms\nfrom mmcv.runner import BaseModule, force_fp32\n\nfrom mmdet.core import multi_apply\nfrom ..builder import HEADS, build_loss\nfrom ..utils import gaussian_radius, gen_gaussian_target\nfrom ..utils.gaussian_target import (gather_feat, get_local_maximum,\n                                     get_topk_from_heatmap,\n                                     transpose_and_gather_feat)\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\n\n\nclass BiCornerPool(BaseModule):\n    \"\"\"Bidirectional Corner Pooling Module (TopLeft, BottomRight, etc.)\n\n    Args:\n        in_channels (int): Input channels of module.\n        out_channels (int): Output channels of module.\n        feat_channels (int): Feature channels of module.\n        directions (list[str]): Directions of two CornerPools.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 directions,\n                 feat_channels=128,\n                 out_channels=128,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 init_cfg=None):\n        super(BiCornerPool, self).__init__(init_cfg)\n        self.direction1_conv = ConvModule(\n            in_channels, feat_channels, 3, padding=1, norm_cfg=norm_cfg)\n        self.direction2_conv = ConvModule(\n            in_channels, feat_channels, 3, padding=1, norm_cfg=norm_cfg)\n\n        self.aftpool_conv = ConvModule(\n            feat_channels,\n            out_channels,\n            3,\n            padding=1,\n            norm_cfg=norm_cfg,\n            act_cfg=None)\n\n        self.conv1 = ConvModule(\n            in_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=None)\n        self.conv2 = ConvModule(\n            in_channels, out_channels, 3, padding=1, norm_cfg=norm_cfg)\n\n        self.direction1_pool = CornerPool(directions[0])\n        self.direction2_pool = CornerPool(directions[1])\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            x (tensor): Input feature of BiCornerPool.\n\n        Returns:\n            conv2 (tensor): Output feature of BiCornerPool.\n        \"\"\"\n        direction1_conv = self.direction1_conv(x)\n        direction2_conv = self.direction2_conv(x)\n        direction1_feat = self.direction1_pool(direction1_conv)\n        direction2_feat = self.direction2_pool(direction2_conv)\n        aftpool_conv = self.aftpool_conv(direction1_feat + direction2_feat)\n        conv1 = self.conv1(x)\n        relu = self.relu(aftpool_conv + conv1)\n        conv2 = self.conv2(relu)\n        return conv2\n\n\n@HEADS.register_module()\nclass CornerHead(BaseDenseHead, BBoxTestMixin):\n    \"\"\"Head of CornerNet: Detecting Objects as Paired Keypoints.\n\n    Code is modified from the `official github repo\n    <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/\n    kp.py#L73>`_ .\n\n    More details can be found in the `paper\n    <https://arxiv.org/abs/1808.01244>`_ .\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        num_feat_levels (int): Levels of feature from the previous module. 2\n            for HourglassNet-104 and 1 for HourglassNet-52. Because\n            HourglassNet-104 outputs the final feature and intermediate\n            supervision feature and HourglassNet-52 only outputs the final\n            feature. Default: 2.\n        corner_emb_channels (int): Channel of embedding vector. Default: 1.\n        train_cfg (dict | None): Training config. Useless in CornerHead,\n            but we keep this variable for SingleStageDetector. Default: None.\n        test_cfg (dict | None): Testing config of CornerHead. Default: None.\n        loss_heatmap (dict | None): Config of corner heatmap loss. Default:\n            GaussianFocalLoss.\n        loss_embedding (dict | None): Config of corner embedding loss. Default:\n            AssociativeEmbeddingLoss.\n        loss_offset (dict | None): Config of corner offset loss. Default:\n            SmoothL1Loss.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 num_feat_levels=2,\n                 corner_emb_channels=1,\n                 train_cfg=None,\n                 test_cfg=None,\n                 loss_heatmap=dict(\n                     type='GaussianFocalLoss',\n                     alpha=2.0,\n                     gamma=4.0,\n                     loss_weight=1),\n                 loss_embedding=dict(\n                     type='AssociativeEmbeddingLoss',\n                     pull_weight=0.25,\n                     push_weight=0.25),\n                 loss_offset=dict(\n                     type='SmoothL1Loss', beta=1.0, loss_weight=1),\n                 init_cfg=None):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(CornerHead, self).__init__(init_cfg)\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.corner_emb_channels = corner_emb_channels\n        self.with_corner_emb = self.corner_emb_channels > 0\n        self.corner_offset_channels = 2\n        self.num_feat_levels = num_feat_levels\n        self.loss_heatmap = build_loss(\n            loss_heatmap) if loss_heatmap is not None else None\n        self.loss_embedding = build_loss(\n            loss_embedding) if loss_embedding is not None else None\n        self.loss_offset = build_loss(\n            loss_offset) if loss_offset is not None else None\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        self.fp16_enabled = False\n        self._init_layers()\n\n    def _make_layers(self, out_channels, in_channels=256, feat_channels=256):\n        \"\"\"Initialize conv sequential for CornerHead.\"\"\"\n        return nn.Sequential(\n            ConvModule(in_channels, feat_channels, 3, padding=1),\n            ConvModule(\n                feat_channels, out_channels, 1, norm_cfg=None, act_cfg=None))\n\n    def _init_corner_kpt_layers(self):\n        \"\"\"Initialize corner keypoint layers.\n\n        Including corner heatmap branch and corner offset branch. Each branch\n        has two parts: prefix `tl_` for top-left and `br_` for bottom-right.\n        \"\"\"\n        self.tl_pool, self.br_pool = nn.ModuleList(), nn.ModuleList()\n        self.tl_heat, self.br_heat = nn.ModuleList(), nn.ModuleList()\n        self.tl_off, self.br_off = nn.ModuleList(), nn.ModuleList()\n\n        for _ in range(self.num_feat_levels):\n            self.tl_pool.append(\n                BiCornerPool(\n                    self.in_channels, ['top', 'left'],\n                    out_channels=self.in_channels))\n            self.br_pool.append(\n                BiCornerPool(\n                    self.in_channels, ['bottom', 'right'],\n                    out_channels=self.in_channels))\n\n            self.tl_heat.append(\n                self._make_layers(\n                    out_channels=self.num_classes,\n                    in_channels=self.in_channels))\n            self.br_heat.append(\n                self._make_layers(\n                    out_channels=self.num_classes,\n                    in_channels=self.in_channels))\n\n            self.tl_off.append(\n                self._make_layers(\n                    out_channels=self.corner_offset_channels,\n                    in_channels=self.in_channels))\n            self.br_off.append(\n                self._make_layers(\n                    out_channels=self.corner_offset_channels,\n                    in_channels=self.in_channels))\n\n    def _init_corner_emb_layers(self):\n        \"\"\"Initialize corner embedding layers.\n\n        Only include corner embedding branch with two parts: prefix `tl_` for\n        top-left and `br_` for bottom-right.\n        \"\"\"\n        self.tl_emb, self.br_emb = nn.ModuleList(), nn.ModuleList()\n\n        for _ in range(self.num_feat_levels):\n            self.tl_emb.append(\n                self._make_layers(\n                    out_channels=self.corner_emb_channels,\n                    in_channels=self.in_channels))\n            self.br_emb.append(\n                self._make_layers(\n                    out_channels=self.corner_emb_channels,\n                    in_channels=self.in_channels))\n\n    def _init_layers(self):\n        \"\"\"Initialize layers for CornerHead.\n\n        Including two parts: corner keypoint layers and corner embedding layers\n        \"\"\"\n        self._init_corner_kpt_layers()\n        if self.with_corner_emb:\n            self._init_corner_emb_layers()\n\n    def init_weights(self):\n        super(CornerHead, self).init_weights()\n        bias_init = bias_init_with_prob(0.1)\n        for i in range(self.num_feat_levels):\n            # The initialization of parameters are different between\n            # nn.Conv2d and ConvModule. Our experiments show that\n            # using the original initialization of nn.Conv2d increases\n            # the final mAP by about 0.2%\n            self.tl_heat[i][-1].conv.reset_parameters()\n            self.tl_heat[i][-1].conv.bias.data.fill_(bias_init)\n            self.br_heat[i][-1].conv.reset_parameters()\n            self.br_heat[i][-1].conv.bias.data.fill_(bias_init)\n            self.tl_off[i][-1].conv.reset_parameters()\n            self.br_off[i][-1].conv.reset_parameters()\n            if self.with_corner_emb:\n                self.tl_emb[i][-1].conv.reset_parameters()\n                self.br_emb[i][-1].conv.reset_parameters()\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually a tuple of corner heatmaps, offset heatmaps and\n            embedding heatmaps.\n                - tl_heats (list[Tensor]): Top-left corner heatmaps for all\n                  levels, each is a 4D-tensor, the channels number is\n                  num_classes.\n                - br_heats (list[Tensor]): Bottom-right corner heatmaps for all\n                  levels, each is a 4D-tensor, the channels number is\n                  num_classes.\n                - tl_embs (list[Tensor] | list[None]): Top-left embedding\n                  heatmaps for all levels, each is a 4D-tensor or None.\n                  If not None, the channels number is corner_emb_channels.\n                - br_embs (list[Tensor] | list[None]): Bottom-right embedding\n                  heatmaps for all levels, each is a 4D-tensor or None.\n                  If not None, the channels number is corner_emb_channels.\n                - tl_offs (list[Tensor]): Top-left offset heatmaps for all\n                  levels, each is a 4D-tensor. The channels number is\n                  corner_offset_channels.\n                - br_offs (list[Tensor]): Bottom-right offset heatmaps for all\n                  levels, each is a 4D-tensor. The channels number is\n                  corner_offset_channels.\n        \"\"\"\n        lvl_ind = list(range(self.num_feat_levels))\n        return multi_apply(self.forward_single, feats, lvl_ind)\n\n    def forward_single(self, x, lvl_ind, return_pool=False):\n        \"\"\"Forward feature of a single level.\n\n        Args:\n            x (Tensor): Feature of a single level.\n            lvl_ind (int): Level index of current feature.\n            return_pool (bool): Return corner pool feature or not.\n\n        Returns:\n            tuple[Tensor]: A tuple of CornerHead's output for current feature\n            level. Containing the following Tensors:\n\n                - tl_heat (Tensor): Predicted top-left corner heatmap.\n                - br_heat (Tensor): Predicted bottom-right corner heatmap.\n                - tl_emb (Tensor | None): Predicted top-left embedding heatmap.\n                  None for `self.with_corner_emb == False`.\n                - br_emb (Tensor | None): Predicted bottom-right embedding\n                  heatmap. None for `self.with_corner_emb == False`.\n                - tl_off (Tensor): Predicted top-left offset heatmap.\n                - br_off (Tensor): Predicted bottom-right offset heatmap.\n                - tl_pool (Tensor): Top-left corner pool feature. Not must\n                  have.\n                - br_pool (Tensor): Bottom-right corner pool feature. Not must\n                  have.\n        \"\"\"\n        tl_pool = self.tl_pool[lvl_ind](x)\n        tl_heat = self.tl_heat[lvl_ind](tl_pool)\n        br_pool = self.br_pool[lvl_ind](x)\n        br_heat = self.br_heat[lvl_ind](br_pool)\n\n        tl_emb, br_emb = None, None\n        if self.with_corner_emb:\n            tl_emb = self.tl_emb[lvl_ind](tl_pool)\n            br_emb = self.br_emb[lvl_ind](br_pool)\n\n        tl_off = self.tl_off[lvl_ind](tl_pool)\n        br_off = self.br_off[lvl_ind](br_pool)\n\n        result_list = [tl_heat, br_heat, tl_emb, br_emb, tl_off, br_off]\n        if return_pool:\n            result_list.append(tl_pool)\n            result_list.append(br_pool)\n\n        return result_list\n\n    def get_targets(self,\n                    gt_bboxes,\n                    gt_labels,\n                    feat_shape,\n                    img_shape,\n                    with_corner_emb=False,\n                    with_guiding_shift=False,\n                    with_centripetal_shift=False):\n        \"\"\"Generate corner targets.\n\n        Including corner heatmap, corner offset.\n\n        Optional: corner embedding, corner guiding shift, centripetal shift.\n\n        For CornerNet, we generate corner heatmap, corner offset and corner\n        embedding from this function.\n\n        For CentripetalNet, we generate corner heatmap, corner offset, guiding\n        shift and centripetal shift from this function.\n\n        Args:\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image, each\n                has shape (num_gt, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each box, each has\n                shape (num_gt,).\n            feat_shape (list[int]): Shape of output feature,\n                [batch, channel, height, width].\n            img_shape (list[int]): Shape of input image,\n                [height, width, channel].\n            with_corner_emb (bool): Generate corner embedding target or not.\n                Default: False.\n            with_guiding_shift (bool): Generate guiding shift target or not.\n                Default: False.\n            with_centripetal_shift (bool): Generate centripetal shift target or\n                not. Default: False.\n\n        Returns:\n            dict: Ground truth of corner heatmap, corner offset, corner\n            embedding, guiding shift and centripetal shift. Containing the\n            following keys:\n\n                - topleft_heatmap (Tensor): Ground truth top-left corner\n                  heatmap.\n                - bottomright_heatmap (Tensor): Ground truth bottom-right\n                  corner heatmap.\n                - topleft_offset (Tensor): Ground truth top-left corner offset.\n                - bottomright_offset (Tensor): Ground truth bottom-right corner\n                  offset.\n                - corner_embedding (list[list[list[int]]]): Ground truth corner\n                  embedding. Not must have.\n                - topleft_guiding_shift (Tensor): Ground truth top-left corner\n                  guiding shift. Not must have.\n                - bottomright_guiding_shift (Tensor): Ground truth bottom-right\n                  corner guiding shift. Not must have.\n                - topleft_centripetal_shift (Tensor): Ground truth top-left\n                  corner centripetal shift. Not must have.\n                - bottomright_centripetal_shift (Tensor): Ground truth\n                  bottom-right corner centripetal shift. Not must have.\n        \"\"\"\n        batch_size, _, height, width = feat_shape\n        img_h, img_w = img_shape[:2]\n\n        width_ratio = float(width / img_w)\n        height_ratio = float(height / img_h)\n\n        gt_tl_heatmap = gt_bboxes[-1].new_zeros(\n            [batch_size, self.num_classes, height, width])\n        gt_br_heatmap = gt_bboxes[-1].new_zeros(\n            [batch_size, self.num_classes, height, width])\n        gt_tl_offset = gt_bboxes[-1].new_zeros([batch_size, 2, height, width])\n        gt_br_offset = gt_bboxes[-1].new_zeros([batch_size, 2, height, width])\n\n        if with_corner_emb:\n            match = []\n\n        # Guiding shift is a kind of offset, from center to corner\n        if with_guiding_shift:\n            gt_tl_guiding_shift = gt_bboxes[-1].new_zeros(\n                [batch_size, 2, height, width])\n            gt_br_guiding_shift = gt_bboxes[-1].new_zeros(\n                [batch_size, 2, height, width])\n        # Centripetal shift is also a kind of offset, from center to corner\n        # and normalized by log.\n        if with_centripetal_shift:\n            gt_tl_centripetal_shift = gt_bboxes[-1].new_zeros(\n                [batch_size, 2, height, width])\n            gt_br_centripetal_shift = gt_bboxes[-1].new_zeros(\n                [batch_size, 2, height, width])\n\n        for batch_id in range(batch_size):\n            # Ground truth of corner embedding per image is a list of coord set\n            corner_match = []\n            for box_id in range(len(gt_labels[batch_id])):\n                left, top, right, bottom = gt_bboxes[batch_id][box_id]\n                center_x = (left + right) / 2.0\n                center_y = (top + bottom) / 2.0\n                label = gt_labels[batch_id][box_id]\n\n                # Use coords in the feature level to generate ground truth\n                scale_left = left * width_ratio\n                scale_right = right * width_ratio\n                scale_top = top * height_ratio\n                scale_bottom = bottom * height_ratio\n                scale_center_x = center_x * width_ratio\n                scale_center_y = center_y * height_ratio\n\n                # Int coords on feature map/ground truth tensor\n                left_idx = int(min(scale_left, width - 1))\n                right_idx = int(min(scale_right, width - 1))\n                top_idx = int(min(scale_top, height - 1))\n                bottom_idx = int(min(scale_bottom, height - 1))\n\n                # Generate gaussian heatmap\n                scale_box_width = ceil(scale_right - scale_left)\n                scale_box_height = ceil(scale_bottom - scale_top)\n                radius = gaussian_radius((scale_box_height, scale_box_width),\n                                         min_overlap=0.3)\n                radius = max(0, int(radius))\n                gt_tl_heatmap[batch_id, label] = gen_gaussian_target(\n                    gt_tl_heatmap[batch_id, label], [left_idx, top_idx],\n                    radius)\n                gt_br_heatmap[batch_id, label] = gen_gaussian_target(\n                    gt_br_heatmap[batch_id, label], [right_idx, bottom_idx],\n                    radius)\n\n                # Generate corner offset\n                left_offset = scale_left - left_idx\n                top_offset = scale_top - top_idx\n                right_offset = scale_right - right_idx\n                bottom_offset = scale_bottom - bottom_idx\n                gt_tl_offset[batch_id, 0, top_idx, left_idx] = left_offset\n                gt_tl_offset[batch_id, 1, top_idx, left_idx] = top_offset\n                gt_br_offset[batch_id, 0, bottom_idx, right_idx] = right_offset\n                gt_br_offset[batch_id, 1, bottom_idx,\n                             right_idx] = bottom_offset\n\n                # Generate corner embedding\n                if with_corner_emb:\n                    corner_match.append([[top_idx, left_idx],\n                                         [bottom_idx, right_idx]])\n                # Generate guiding shift\n                if with_guiding_shift:\n                    gt_tl_guiding_shift[batch_id, 0, top_idx,\n                                        left_idx] = scale_center_x - left_idx\n                    gt_tl_guiding_shift[batch_id, 1, top_idx,\n                                        left_idx] = scale_center_y - top_idx\n                    gt_br_guiding_shift[batch_id, 0, bottom_idx,\n                                        right_idx] = right_idx - scale_center_x\n                    gt_br_guiding_shift[\n                        batch_id, 1, bottom_idx,\n                        right_idx] = bottom_idx - scale_center_y\n                # Generate centripetal shift\n                if with_centripetal_shift:\n                    gt_tl_centripetal_shift[batch_id, 0, top_idx,\n                                            left_idx] = log(scale_center_x -\n                                                            scale_left)\n                    gt_tl_centripetal_shift[batch_id, 1, top_idx,\n                                            left_idx] = log(scale_center_y -\n                                                            scale_top)\n                    gt_br_centripetal_shift[batch_id, 0, bottom_idx,\n                                            right_idx] = log(scale_right -\n                                                             scale_center_x)\n                    gt_br_centripetal_shift[batch_id, 1, bottom_idx,\n                                            right_idx] = log(scale_bottom -\n                                                             scale_center_y)\n\n            if with_corner_emb:\n                match.append(corner_match)\n\n        target_result = dict(\n            topleft_heatmap=gt_tl_heatmap,\n            topleft_offset=gt_tl_offset,\n            bottomright_heatmap=gt_br_heatmap,\n            bottomright_offset=gt_br_offset)\n\n        if with_corner_emb:\n            target_result.update(corner_embedding=match)\n        if with_guiding_shift:\n            target_result.update(\n                topleft_guiding_shift=gt_tl_guiding_shift,\n                bottomright_guiding_shift=gt_br_guiding_shift)\n        if with_centripetal_shift:\n            target_result.update(\n                topleft_centripetal_shift=gt_tl_centripetal_shift,\n                bottomright_centripetal_shift=gt_br_centripetal_shift)\n\n        return target_result\n\n    @force_fp32()\n    def loss(self,\n             tl_heats,\n             br_heats,\n             tl_embs,\n             br_embs,\n             tl_offs,\n             br_offs,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            tl_heats (list[Tensor]): Top-left corner heatmaps for each level\n                with shape (N, num_classes, H, W).\n            br_heats (list[Tensor]): Bottom-right corner heatmaps for each\n                level with shape (N, num_classes, H, W).\n            tl_embs (list[Tensor]): Top-left corner embeddings for each level\n                with shape (N, corner_emb_channels, H, W).\n            br_embs (list[Tensor]): Bottom-right corner embeddings for each\n                level with shape (N, corner_emb_channels, H, W).\n            tl_offs (list[Tensor]): Top-left corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            br_offs (list[Tensor]): Bottom-right corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [left, top, right, bottom] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components. Containing the\n            following losses:\n\n                - det_loss (list[Tensor]): Corner keypoint losses of all\n                  feature levels.\n                - pull_loss (list[Tensor]): Part one of AssociativeEmbedding\n                  losses of all feature levels.\n                - push_loss (list[Tensor]): Part two of AssociativeEmbedding\n                  losses of all feature levels.\n                - off_loss (list[Tensor]): Corner offset losses of all feature\n                  levels.\n        \"\"\"\n        targets = self.get_targets(\n            gt_bboxes,\n            gt_labels,\n            tl_heats[-1].shape,\n            img_metas[0]['pad_shape'],\n            with_corner_emb=self.with_corner_emb)\n        mlvl_targets = [targets for _ in range(self.num_feat_levels)]\n        det_losses, pull_losses, push_losses, off_losses = multi_apply(\n            self.loss_single, tl_heats, br_heats, tl_embs, br_embs, tl_offs,\n            br_offs, mlvl_targets)\n        loss_dict = dict(det_loss=det_losses, off_loss=off_losses)\n        if self.with_corner_emb:\n            loss_dict.update(pull_loss=pull_losses, push_loss=push_losses)\n        return loss_dict\n\n    def loss_single(self, tl_hmp, br_hmp, tl_emb, br_emb, tl_off, br_off,\n                    targets):\n        \"\"\"Compute losses for single level.\n\n        Args:\n            tl_hmp (Tensor): Top-left corner heatmap for current level with\n                shape (N, num_classes, H, W).\n            br_hmp (Tensor): Bottom-right corner heatmap for current level with\n                shape (N, num_classes, H, W).\n            tl_emb (Tensor): Top-left corner embedding for current level with\n                shape (N, corner_emb_channels, H, W).\n            br_emb (Tensor): Bottom-right corner embedding for current level\n                with shape (N, corner_emb_channels, H, W).\n            tl_off (Tensor): Top-left corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            br_off (Tensor): Bottom-right corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            targets (dict): Corner target generated by `get_targets`.\n\n        Returns:\n            tuple[torch.Tensor]: Losses of the head's different branches\n            containing the following losses:\n\n                - det_loss (Tensor): Corner keypoint loss.\n                - pull_loss (Tensor): Part one of AssociativeEmbedding loss.\n                - push_loss (Tensor): Part two of AssociativeEmbedding loss.\n                - off_loss (Tensor): Corner offset loss.\n        \"\"\"\n        gt_tl_hmp = targets['topleft_heatmap']\n        gt_br_hmp = targets['bottomright_heatmap']\n        gt_tl_off = targets['topleft_offset']\n        gt_br_off = targets['bottomright_offset']\n        gt_embedding = targets['corner_embedding']\n\n        # Detection loss\n        tl_det_loss = self.loss_heatmap(\n            tl_hmp.sigmoid(),\n            gt_tl_hmp,\n            avg_factor=max(1,\n                           gt_tl_hmp.eq(1).sum()))\n        br_det_loss = self.loss_heatmap(\n            br_hmp.sigmoid(),\n            gt_br_hmp,\n            avg_factor=max(1,\n                           gt_br_hmp.eq(1).sum()))\n        det_loss = (tl_det_loss + br_det_loss) / 2.0\n\n        # AssociativeEmbedding loss\n        if self.with_corner_emb and self.loss_embedding is not None:\n            pull_loss, push_loss = self.loss_embedding(tl_emb, br_emb,\n                                                       gt_embedding)\n        else:\n            pull_loss, push_loss = None, None\n\n        # Offset loss\n        # We only compute the offset loss at the real corner position.\n        # The value of real corner would be 1 in heatmap ground truth.\n        # The mask is computed in class agnostic mode and its shape is\n        # batch * 1 * width * height.\n        tl_off_mask = gt_tl_hmp.eq(1).sum(1).gt(0).unsqueeze(1).type_as(\n            gt_tl_hmp)\n        br_off_mask = gt_br_hmp.eq(1).sum(1).gt(0).unsqueeze(1).type_as(\n            gt_br_hmp)\n        tl_off_loss = self.loss_offset(\n            tl_off,\n            gt_tl_off,\n            tl_off_mask,\n            avg_factor=max(1, tl_off_mask.sum()))\n        br_off_loss = self.loss_offset(\n            br_off,\n            gt_br_off,\n            br_off_mask,\n            avg_factor=max(1, br_off_mask.sum()))\n\n        off_loss = (tl_off_loss + br_off_loss) / 2.0\n\n        return det_loss, pull_loss, push_loss, off_loss\n\n    @force_fp32()\n    def get_bboxes(self,\n                   tl_heats,\n                   br_heats,\n                   tl_embs,\n                   br_embs,\n                   tl_offs,\n                   br_offs,\n                   img_metas,\n                   rescale=False,\n                   with_nms=True):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            tl_heats (list[Tensor]): Top-left corner heatmaps for each level\n                with shape (N, num_classes, H, W).\n            br_heats (list[Tensor]): Bottom-right corner heatmaps for each\n                level with shape (N, num_classes, H, W).\n            tl_embs (list[Tensor]): Top-left corner embeddings for each level\n                with shape (N, corner_emb_channels, H, W).\n            br_embs (list[Tensor]): Bottom-right corner embeddings for each\n                level with shape (N, corner_emb_channels, H, W).\n            tl_offs (list[Tensor]): Top-left corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            br_offs (list[Tensor]): Bottom-right corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n        \"\"\"\n        assert tl_heats[-1].shape[0] == br_heats[-1].shape[0] == len(img_metas)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            result_list.append(\n                self._get_bboxes_single(\n                    tl_heats[-1][img_id:img_id + 1, :],\n                    br_heats[-1][img_id:img_id + 1, :],\n                    tl_offs[-1][img_id:img_id + 1, :],\n                    br_offs[-1][img_id:img_id + 1, :],\n                    img_metas[img_id],\n                    tl_emb=tl_embs[-1][img_id:img_id + 1, :],\n                    br_emb=br_embs[-1][img_id:img_id + 1, :],\n                    rescale=rescale,\n                    with_nms=with_nms))\n\n        return result_list\n\n    def _get_bboxes_single(self,\n                           tl_heat,\n                           br_heat,\n                           tl_off,\n                           br_off,\n                           img_meta,\n                           tl_emb=None,\n                           br_emb=None,\n                           tl_centripetal_shift=None,\n                           br_centripetal_shift=None,\n                           rescale=False,\n                           with_nms=True):\n        \"\"\"Transform outputs for a single batch item into bbox predictions.\n\n        Args:\n            tl_heat (Tensor): Top-left corner heatmap for current level with\n                shape (N, num_classes, H, W).\n            br_heat (Tensor): Bottom-right corner heatmap for current level\n                with shape (N, num_classes, H, W).\n            tl_off (Tensor): Top-left corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            br_off (Tensor): Bottom-right corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            img_meta (dict): Meta information of current image, e.g.,\n                image size, scaling factor, etc.\n            tl_emb (Tensor): Top-left corner embedding for current level with\n                shape (N, corner_emb_channels, H, W).\n            br_emb (Tensor): Bottom-right corner embedding for current level\n                with shape (N, corner_emb_channels, H, W).\n            tl_centripetal_shift: Top-left corner's centripetal shift for\n                current level with shape (N, 2, H, W).\n            br_centripetal_shift: Bottom-right corner's centripetal shift for\n                current level with shape (N, 2, H, W).\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n        \"\"\"\n        if isinstance(img_meta, (list, tuple)):\n            img_meta = img_meta[0]\n\n        batch_bboxes, batch_scores, batch_clses = self.decode_heatmap(\n            tl_heat=tl_heat.sigmoid(),\n            br_heat=br_heat.sigmoid(),\n            tl_off=tl_off,\n            br_off=br_off,\n            tl_emb=tl_emb,\n            br_emb=br_emb,\n            tl_centripetal_shift=tl_centripetal_shift,\n            br_centripetal_shift=br_centripetal_shift,\n            img_meta=img_meta,\n            k=self.test_cfg.corner_topk,\n            kernel=self.test_cfg.local_maximum_kernel,\n            distance_threshold=self.test_cfg.distance_threshold)\n\n        if rescale:\n            batch_bboxes /= batch_bboxes.new_tensor(img_meta['scale_factor'])\n\n        bboxes = batch_bboxes.view([-1, 4])\n        scores = batch_scores.view(-1)\n        clses = batch_clses.view(-1)\n\n        detections = torch.cat([bboxes, scores.unsqueeze(-1)], -1)\n        keepinds = (detections[:, -1] > -0.1)\n        detections = detections[keepinds]\n        labels = clses[keepinds]\n\n        if with_nms:\n            detections, labels = self._bboxes_nms(detections, labels,\n                                                  self.test_cfg)\n\n        return detections, labels\n\n    def _bboxes_nms(self, bboxes, labels, cfg):\n        if 'nms_cfg' in cfg:\n            warning.warn('nms_cfg in test_cfg will be deprecated. '\n                         'Please rename it as nms')\n        if 'nms' not in cfg:\n            cfg.nms = cfg.nms_cfg\n\n        if labels.numel() > 0:\n            max_num = cfg.max_per_img\n            bboxes, keep = batched_nms(bboxes[:, :4], bboxes[:,\n                                                             -1].contiguous(),\n                                       labels, cfg.nms)\n            if max_num > 0:\n                bboxes = bboxes[:max_num]\n                labels = labels[keep][:max_num]\n\n        return bboxes, labels\n\n    def decode_heatmap(self,\n                       tl_heat,\n                       br_heat,\n                       tl_off,\n                       br_off,\n                       tl_emb=None,\n                       br_emb=None,\n                       tl_centripetal_shift=None,\n                       br_centripetal_shift=None,\n                       img_meta=None,\n                       k=100,\n                       kernel=3,\n                       distance_threshold=0.5,\n                       num_dets=1000):\n        \"\"\"Transform outputs for a single batch item into raw bbox predictions.\n\n        Args:\n            tl_heat (Tensor): Top-left corner heatmap for current level with\n                shape (N, num_classes, H, W).\n            br_heat (Tensor): Bottom-right corner heatmap for current level\n                with shape (N, num_classes, H, W).\n            tl_off (Tensor): Top-left corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            br_off (Tensor): Bottom-right corner offset for current level with\n                shape (N, corner_offset_channels, H, W).\n            tl_emb (Tensor | None): Top-left corner embedding for current\n                level with shape (N, corner_emb_channels, H, W).\n            br_emb (Tensor | None): Bottom-right corner embedding for current\n                level with shape (N, corner_emb_channels, H, W).\n            tl_centripetal_shift (Tensor | None): Top-left centripetal shift\n                for current level with shape (N, 2, H, W).\n            br_centripetal_shift (Tensor | None): Bottom-right centripetal\n                shift for current level with shape (N, 2, H, W).\n            img_meta (dict): Meta information of current image, e.g.,\n                image size, scaling factor, etc.\n            k (int): Get top k corner keypoints from heatmap.\n            kernel (int): Max pooling kernel for extract local maximum pixels.\n            distance_threshold (float): Distance threshold. Top-left and\n                bottom-right corner keypoints with feature distance less than\n                the threshold will be regarded as keypoints from same object.\n            num_dets (int): Num of raw boxes before doing nms.\n\n        Returns:\n            tuple[torch.Tensor]: Decoded output of CornerHead, containing the\n            following Tensors:\n\n            - bboxes (Tensor): Coords of each box.\n            - scores (Tensor): Scores of each box.\n            - clses (Tensor): Categories of each box.\n        \"\"\"\n        with_embedding = tl_emb is not None and br_emb is not None\n        with_centripetal_shift = (\n            tl_centripetal_shift is not None\n            and br_centripetal_shift is not None)\n        assert with_embedding + with_centripetal_shift == 1\n        batch, _, height, width = tl_heat.size()\n        if torch.onnx.is_in_onnx_export():\n            inp_h, inp_w = img_meta['pad_shape_for_onnx'][:2]\n        else:\n            inp_h, inp_w, _ = img_meta['pad_shape']\n\n        # perform nms on heatmaps\n        tl_heat = get_local_maximum(tl_heat, kernel=kernel)\n        br_heat = get_local_maximum(br_heat, kernel=kernel)\n\n        tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = get_topk_from_heatmap(\n            tl_heat, k=k)\n        br_scores, br_inds, br_clses, br_ys, br_xs = get_topk_from_heatmap(\n            br_heat, k=k)\n\n        # We use repeat instead of expand here because expand is a\n        # shallow-copy function. Thus it could cause unexpected testing result\n        # sometimes. Using expand will decrease about 10% mAP during testing\n        # compared to repeat.\n        tl_ys = tl_ys.view(batch, k, 1).repeat(1, 1, k)\n        tl_xs = tl_xs.view(batch, k, 1).repeat(1, 1, k)\n        br_ys = br_ys.view(batch, 1, k).repeat(1, k, 1)\n        br_xs = br_xs.view(batch, 1, k).repeat(1, k, 1)\n\n        tl_off = transpose_and_gather_feat(tl_off, tl_inds)\n        tl_off = tl_off.view(batch, k, 1, 2)\n        br_off = transpose_and_gather_feat(br_off, br_inds)\n        br_off = br_off.view(batch, 1, k, 2)\n\n        tl_xs = tl_xs + tl_off[..., 0]\n        tl_ys = tl_ys + tl_off[..., 1]\n        br_xs = br_xs + br_off[..., 0]\n        br_ys = br_ys + br_off[..., 1]\n\n        if with_centripetal_shift:\n            tl_centripetal_shift = transpose_and_gather_feat(\n                tl_centripetal_shift, tl_inds).view(batch, k, 1, 2).exp()\n            br_centripetal_shift = transpose_and_gather_feat(\n                br_centripetal_shift, br_inds).view(batch, 1, k, 2).exp()\n\n            tl_ctxs = tl_xs + tl_centripetal_shift[..., 0]\n            tl_ctys = tl_ys + tl_centripetal_shift[..., 1]\n            br_ctxs = br_xs - br_centripetal_shift[..., 0]\n            br_ctys = br_ys - br_centripetal_shift[..., 1]\n\n        # all possible boxes based on top k corners (ignoring class)\n        tl_xs *= (inp_w / width)\n        tl_ys *= (inp_h / height)\n        br_xs *= (inp_w / width)\n        br_ys *= (inp_h / height)\n\n        if with_centripetal_shift:\n            tl_ctxs *= (inp_w / width)\n            tl_ctys *= (inp_h / height)\n            br_ctxs *= (inp_w / width)\n            br_ctys *= (inp_h / height)\n\n        x_off, y_off = 0, 0  # no crop\n        if not torch.onnx.is_in_onnx_export():\n            # since `RandomCenterCropPad` is done on CPU with numpy and it's\n            # not dynamic traceable when exporting to ONNX, thus 'border'\n            # does not appears as key in 'img_meta'. As a tmp solution,\n            # we move this 'border' handle part to the postprocess after\n            # finished exporting to ONNX, which is handle in\n            # `mmdet/core/export/model_wrappers.py`. Though difference between\n            # pytorch and exported onnx model, it might be ignored since\n            # comparable performance is achieved between them (e.g. 40.4 vs\n            # 40.6 on COCO val2017, for CornerNet without test-time flip)\n            if 'border' in img_meta:\n                x_off = img_meta['border'][2]\n                y_off = img_meta['border'][0]\n\n        tl_xs -= x_off\n        tl_ys -= y_off\n        br_xs -= x_off\n        br_ys -= y_off\n\n        zeros = tl_xs.new_zeros(*tl_xs.size())\n        tl_xs = torch.where(tl_xs > 0.0, tl_xs, zeros)\n        tl_ys = torch.where(tl_ys > 0.0, tl_ys, zeros)\n        br_xs = torch.where(br_xs > 0.0, br_xs, zeros)\n        br_ys = torch.where(br_ys > 0.0, br_ys, zeros)\n\n        bboxes = torch.stack((tl_xs, tl_ys, br_xs, br_ys), dim=3)\n        area_bboxes = ((br_xs - tl_xs) * (br_ys - tl_ys)).abs()\n\n        if with_centripetal_shift:\n            tl_ctxs -= x_off\n            tl_ctys -= y_off\n            br_ctxs -= x_off\n            br_ctys -= y_off\n\n            tl_ctxs *= tl_ctxs.gt(0.0).type_as(tl_ctxs)\n            tl_ctys *= tl_ctys.gt(0.0).type_as(tl_ctys)\n            br_ctxs *= br_ctxs.gt(0.0).type_as(br_ctxs)\n            br_ctys *= br_ctys.gt(0.0).type_as(br_ctys)\n\n            ct_bboxes = torch.stack((tl_ctxs, tl_ctys, br_ctxs, br_ctys),\n                                    dim=3)\n            area_ct_bboxes = ((br_ctxs - tl_ctxs) * (br_ctys - tl_ctys)).abs()\n\n            rcentral = torch.zeros_like(ct_bboxes)\n            # magic nums from paper section 4.1\n            mu = torch.ones_like(area_bboxes) / 2.4\n            mu[area_bboxes > 3500] = 1 / 2.1  # large bbox have smaller mu\n\n            bboxes_center_x = (bboxes[..., 0] + bboxes[..., 2]) / 2\n            bboxes_center_y = (bboxes[..., 1] + bboxes[..., 3]) / 2\n            rcentral[..., 0] = bboxes_center_x - mu * (bboxes[..., 2] -\n                                                       bboxes[..., 0]) / 2\n            rcentral[..., 1] = bboxes_center_y - mu * (bboxes[..., 3] -\n                                                       bboxes[..., 1]) / 2\n            rcentral[..., 2] = bboxes_center_x + mu * (bboxes[..., 2] -\n                                                       bboxes[..., 0]) / 2\n            rcentral[..., 3] = bboxes_center_y + mu * (bboxes[..., 3] -\n                                                       bboxes[..., 1]) / 2\n            area_rcentral = ((rcentral[..., 2] - rcentral[..., 0]) *\n                             (rcentral[..., 3] - rcentral[..., 1])).abs()\n            dists = area_ct_bboxes / area_rcentral\n\n            tl_ctx_inds = (ct_bboxes[..., 0] <= rcentral[..., 0]) | (\n                ct_bboxes[..., 0] >= rcentral[..., 2])\n            tl_cty_inds = (ct_bboxes[..., 1] <= rcentral[..., 1]) | (\n                ct_bboxes[..., 1] >= rcentral[..., 3])\n            br_ctx_inds = (ct_bboxes[..., 2] <= rcentral[..., 0]) | (\n                ct_bboxes[..., 2] >= rcentral[..., 2])\n            br_cty_inds = (ct_bboxes[..., 3] <= rcentral[..., 1]) | (\n                ct_bboxes[..., 3] >= rcentral[..., 3])\n\n        if with_embedding:\n            tl_emb = transpose_and_gather_feat(tl_emb, tl_inds)\n            tl_emb = tl_emb.view(batch, k, 1)\n            br_emb = transpose_and_gather_feat(br_emb, br_inds)\n            br_emb = br_emb.view(batch, 1, k)\n            dists = torch.abs(tl_emb - br_emb)\n\n        tl_scores = tl_scores.view(batch, k, 1).repeat(1, 1, k)\n        br_scores = br_scores.view(batch, 1, k).repeat(1, k, 1)\n\n        scores = (tl_scores + br_scores) / 2  # scores for all possible boxes\n\n        # tl and br should have same class\n        tl_clses = tl_clses.view(batch, k, 1).repeat(1, 1, k)\n        br_clses = br_clses.view(batch, 1, k).repeat(1, k, 1)\n        cls_inds = (tl_clses != br_clses)\n\n        # reject boxes based on distances\n        dist_inds = dists > distance_threshold\n\n        # reject boxes based on widths and heights\n        width_inds = (br_xs <= tl_xs)\n        height_inds = (br_ys <= tl_ys)\n\n        # No use `scores[cls_inds]`, instead we use `torch.where` here.\n        # Since only 1-D indices with type 'tensor(bool)' are supported\n        # when exporting to ONNX, any other bool indices with more dimensions\n        # (e.g. 2-D bool tensor) as input parameter in node is invalid\n        negative_scores = -1 * torch.ones_like(scores)\n        scores = torch.where(cls_inds, negative_scores, scores)\n        scores = torch.where(width_inds, negative_scores, scores)\n        scores = torch.where(height_inds, negative_scores, scores)\n        scores = torch.where(dist_inds, negative_scores, scores)\n\n        if with_centripetal_shift:\n            scores[tl_ctx_inds] = -1\n            scores[tl_cty_inds] = -1\n            scores[br_ctx_inds] = -1\n            scores[br_cty_inds] = -1\n\n        scores = scores.view(batch, -1)\n        scores, inds = torch.topk(scores, num_dets)\n        scores = scores.unsqueeze(2)\n\n        bboxes = bboxes.view(batch, -1, 4)\n        bboxes = gather_feat(bboxes, inds)\n\n        clses = tl_clses.contiguous().view(batch, -1, 1)\n        clses = gather_feat(clses, inds).float()\n\n        return bboxes, scores, clses\n\n    def onnx_export(self,\n                    tl_heats,\n                    br_heats,\n                    tl_embs,\n                    br_embs,\n                    tl_offs,\n                    br_offs,\n                    img_metas,\n                    rescale=False,\n                    with_nms=True):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            tl_heats (list[Tensor]): Top-left corner heatmaps for each level\n                with shape (N, num_classes, H, W).\n            br_heats (list[Tensor]): Bottom-right corner heatmaps for each\n                level with shape (N, num_classes, H, W).\n            tl_embs (list[Tensor]): Top-left corner embeddings for each level\n                with shape (N, corner_emb_channels, H, W).\n            br_embs (list[Tensor]): Bottom-right corner embeddings for each\n                level with shape (N, corner_emb_channels, H, W).\n            tl_offs (list[Tensor]): Top-left corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            br_offs (list[Tensor]): Bottom-right corner offsets for each level\n                with shape (N, corner_offset_channels, H, W).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor, Tensor]: First tensor bboxes with shape\n            [N, num_det, 5], 5 arrange as (x1, y1, x2, y2, score)\n            and second element is class labels of shape [N, num_det].\n        \"\"\"\n        assert tl_heats[-1].shape[0] == br_heats[-1].shape[0] == len(\n            img_metas) == 1\n        result_list = []\n        for img_id in range(len(img_metas)):\n            result_list.append(\n                self._get_bboxes_single(\n                    tl_heats[-1][img_id:img_id + 1, :],\n                    br_heats[-1][img_id:img_id + 1, :],\n                    tl_offs[-1][img_id:img_id + 1, :],\n                    br_offs[-1][img_id:img_id + 1, :],\n                    img_metas[img_id],\n                    tl_emb=tl_embs[-1][img_id:img_id + 1, :],\n                    br_emb=br_embs[-1][img_id:img_id + 1, :],\n                    rescale=rescale,\n                    with_nms=with_nms))\n\n        detections, labels = result_list[0]\n        # batch_size 1 here, [1, num_det, 5], [1, num_det]\n        return detections.unsqueeze(0), labels.unsqueeze(0)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/ddod_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, Scale, bias_init_with_prob, normal_init\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, build_assigner, build_sampler,\n                        images_to_levels, multi_apply, reduce_mean, unmap)\nfrom mmdet.core.bbox import bbox_overlaps\nfrom ..builder import HEADS, build_loss\nfrom .anchor_head import AnchorHead\n\nEPS = 1e-12\n\n\n@HEADS.register_module()\nclass DDODHead(AnchorHead):\n    \"\"\"DDOD head decomposes conjunctions lying in most current one-stage\n    detectors via label assignment disentanglement, spatial feature\n    disentanglement, and pyramid supervision disentanglement.\n\n    https://arxiv.org/abs/2107.02963\n\n    Args:\n        num_classes (int): Number of categories excluding the\n            background category.\n        in_channels (int): Number of channels in the input feature map.\n        stacked_convs (int): The number of stacked Conv. Default: 4.\n        conv_cfg (dict): Conv config of ddod head. Default: None.\n        use_dcn (bool): Use dcn, Same as ATSS when False. Default: True.\n        norm_cfg (dict): Normal config of ddod head. Default:\n            dict(type='GN', num_groups=32, requires_grad=True).\n        loss_iou (dict): Config of IoU loss. Default:\n            dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0).\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 use_dcn=True,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 loss_iou=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 **kwargs):\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.use_dcn = use_dcn\n        super(DDODHead, self).__init__(num_classes, in_channels, **kwargs)\n\n        self.sampling = False\n        if self.train_cfg:\n            self.cls_assigner = build_assigner(self.train_cfg.assigner)\n            self.reg_assigner = build_assigner(self.train_cfg.reg_assigner)\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.loss_iou = build_loss(loss_iou)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=dict(type='DCN', deform_groups=1)\n                    if i == 0 and self.use_dcn else self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=dict(type='DCN', deform_groups=1)\n                    if i == 0 and self.use_dcn else self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.atss_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.atss_reg = nn.Conv2d(\n            self.feat_channels, self.num_base_priors * 4, 3, padding=1)\n        self.atss_iou = nn.Conv2d(\n            self.feat_channels, self.num_base_priors * 1, 3, padding=1)\n        self.scales = nn.ModuleList(\n            [Scale(1.0) for _ in self.prior_generator.strides])\n\n        # we use the global list in loss\n        self.cls_num_pos_samples_per_level = [\n            0. for _ in range(len(self.prior_generator.strides))\n        ]\n        self.reg_num_pos_samples_per_level = [\n            0. for _ in range(len(self.prior_generator.strides))\n        ]\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\"\"\"\n        for m in self.cls_convs:\n            normal_init(m.conv, std=0.01)\n        for m in self.reg_convs:\n            normal_init(m.conv, std=0.01)\n        normal_init(self.atss_reg, std=0.01)\n        normal_init(self.atss_iou, std=0.01)\n        bias_cls = bias_init_with_prob(0.01)\n        normal_init(self.atss_cls, std=0.01, bias=bias_cls)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually a tuple of classification scores and bbox prediction\n                cls_scores (list[Tensor]): Classification scores for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_base_priors * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_base_priors * 4.\n                iou_preds (list[Tensor]): IoU scores for all scale levels,\n                    each is a 4D-tensor, the channels number is\n                    num_base_priors * 1.\n        \"\"\"\n        return multi_apply(self.forward_single, feats, self.scales)\n\n    def forward_single(self, x, scale):\n        \"\"\"Forward feature of a single scale level.\n\n        Args:\n            x (Tensor): Features of a single scale level.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n\n        Returns:\n            tuple:\n                - cls_score (Tensor): Cls scores for a single scale level \\\n                    the channels number is num_base_priors * num_classes.\n                - bbox_pred (Tensor): Box energies / deltas for a single \\\n                    scale level, the channels number is num_base_priors * 4.\n                - iou_pred (Tensor): Iou for a single scale level, the \\\n                    channel number is (N, num_base_priors * 1, H, W).\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n        cls_score = self.atss_cls(cls_feat)\n        # we just follow atss, not apply exp in bbox_pred\n        bbox_pred = scale(self.atss_reg(reg_feat)).float()\n        iou_pred = self.atss_iou(reg_feat)\n        return cls_score, bbox_pred, iou_pred\n\n    def loss_cls_single(self, cls_score, labels, label_weights,\n                        reweight_factor, num_total_samples):\n        \"\"\"Compute cls loss of a single scale level.\n\n        Args:\n            cls_score (Tensor): Box scores for each scale level\n                Has shape (N, num_base_priors * num_classes, H, W).\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors)\n            reweight_factor (list[int]): Reweight factor for cls and reg\n                loss.\n            num_total_samples (int): Number of positive samples that is\n                reduced over all GPUs.\n\n        Returns:\n            tuple[Tensor]: A tuple of loss components.\n        \"\"\"\n        cls_score = cls_score.permute(0, 2, 3, 1).reshape(\n            -1, self.cls_out_channels).contiguous()\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n        return reweight_factor * loss_cls,\n\n    def loss_reg_single(self, anchors, bbox_pred, iou_pred, labels,\n                        label_weights, bbox_targets, bbox_weights,\n                        reweight_factor, num_total_samples):\n        \"\"\"Compute reg loss of a single scale level.\n\n        Args:\n            anchors (Tensor): Box reference for each scale level with shape\n                (N, num_total_anchors, 4).\n            bbox_pred (Tensor): Box energies / deltas for each scale\n                level with shape (N, num_base_priors * 4, H, W).\n            iou_pred (Tensor): Iou for a single scale level, the\n                channel number is (N, num_base_priors * 1, H, W).\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors)\n            bbox_targets (Tensor): BBox regression targets of each anchor\n                weight shape (N, num_total_anchors, 4).\n            bbox_weights (Tensor): BBox weights of all anchors in the\n                image with shape (N, 4)\n            reweight_factor (list[int]): Reweight factor for cls and reg\n                loss.\n            num_total_samples (int): Number of positive samples that is\n                reduced over all GPUs.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        anchors = anchors.reshape(-1, 4)\n        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n        iou_pred = iou_pred.permute(0, 2, 3, 1).reshape(-1, )\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        bbox_weights = bbox_weights.reshape(-1, 4)\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n\n        iou_targets = label_weights.new_zeros(labels.shape)\n        iou_weights = label_weights.new_zeros(labels.shape)\n        iou_weights[(bbox_weights.sum(axis=1) > 0).nonzero(\n            as_tuple=False)] = 1.\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    &\n                    (labels < bg_class_ind)).nonzero(as_tuple=False).squeeze(1)\n\n        if len(pos_inds) > 0:\n            pos_bbox_targets = bbox_targets[pos_inds]\n            pos_bbox_pred = bbox_pred[pos_inds]\n            pos_anchors = anchors[pos_inds]\n\n            pos_decode_bbox_pred = self.bbox_coder.decode(\n                pos_anchors, pos_bbox_pred)\n            pos_decode_bbox_targets = self.bbox_coder.decode(\n                pos_anchors, pos_bbox_targets)\n\n            # regression loss\n            loss_bbox = self.loss_bbox(\n                pos_decode_bbox_pred,\n                pos_decode_bbox_targets,\n                avg_factor=num_total_samples)\n\n            iou_targets[pos_inds] = bbox_overlaps(\n                pos_decode_bbox_pred.detach(),\n                pos_decode_bbox_targets,\n                is_aligned=True)\n            loss_iou = self.loss_iou(\n                iou_pred,\n                iou_targets,\n                iou_weights,\n                avg_factor=num_total_samples)\n        else:\n            loss_bbox = bbox_pred.sum() * 0\n            loss_iou = iou_pred.sum() * 0\n\n        return reweight_factor * loss_bbox, reweight_factor * loss_iou\n\n    def calc_reweight_factor(self, labels_list):\n        \"\"\"Compute reweight_factor for regression and classification loss.\"\"\"\n        # get pos samples for each level\n        bg_class_ind = self.num_classes\n        for ii, each_level_label in enumerate(labels_list):\n            pos_inds = ((each_level_label >= 0) &\n                        (each_level_label < bg_class_ind)).nonzero(\n                            as_tuple=False).squeeze(1)\n            self.cls_num_pos_samples_per_level[ii] += len(pos_inds)\n        # get reweight factor from 1 ~ 2 with bilinear interpolation\n        min_pos_samples = min(self.cls_num_pos_samples_per_level)\n        max_pos_samples = max(self.cls_num_pos_samples_per_level)\n        interval = 1. / (max_pos_samples - min_pos_samples + 1e-10)\n        reweight_factor_per_level = []\n        for pos_samples in self.cls_num_pos_samples_per_level:\n            factor = 2. - (pos_samples - min_pos_samples) * interval\n            reweight_factor_per_level.append(factor)\n        return reweight_factor_per_level\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'iou_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             iou_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_base_priors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_base_priors * 4, H, W)\n            iou_preds (list[Tensor]): Score factor for all scale level,\n                each is a 4D-tensor, has shape (batch_size, 1, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        # calculate common vars for cls and reg assigners at once\n        targets_com = self.process_predictions_and_anchors(\n            anchor_list, valid_flag_list, cls_scores, bbox_preds, img_metas,\n            gt_bboxes_ignore)\n        (anchor_list, valid_flag_list, num_level_anchors_list, cls_score_list,\n         bbox_pred_list, gt_bboxes_ignore_list) = targets_com\n\n        # classification branch assigner\n        cls_targets = self.get_cls_targets(\n            anchor_list,\n            valid_flag_list,\n            num_level_anchors_list,\n            cls_score_list,\n            bbox_pred_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore_list,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_targets is None:\n            return None\n\n        (cls_anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, num_total_pos, num_total_neg) = cls_targets\n\n        num_total_samples = reduce_mean(\n            torch.tensor(num_total_pos, dtype=torch.float,\n                         device=device)).item()\n        num_total_samples = max(num_total_samples, 1.0)\n\n        reweight_factor_per_level = self.calc_reweight_factor(labels_list)\n\n        cls_losses_cls, = multi_apply(\n            self.loss_cls_single,\n            cls_scores,\n            labels_list,\n            label_weights_list,\n            reweight_factor_per_level,\n            num_total_samples=num_total_samples)\n\n        # regression branch assigner\n        reg_targets = self.get_reg_targets(\n            anchor_list,\n            valid_flag_list,\n            num_level_anchors_list,\n            cls_score_list,\n            bbox_pred_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore_list,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if reg_targets is None:\n            return None\n\n        (reg_anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, num_total_pos, num_total_neg) = reg_targets\n\n        num_total_samples = reduce_mean(\n            torch.tensor(num_total_pos, dtype=torch.float,\n                         device=device)).item()\n        num_total_samples = max(num_total_samples, 1.0)\n\n        reweight_factor_per_level = self.calc_reweight_factor(labels_list)\n\n        reg_losses_bbox, reg_losses_iou = multi_apply(\n            self.loss_reg_single,\n            reg_anchor_list,\n            bbox_preds,\n            iou_preds,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            reweight_factor_per_level,\n            num_total_samples=num_total_samples)\n\n        return dict(\n            loss_cls=cls_losses_cls,\n            loss_bbox=reg_losses_bbox,\n            loss_iou=reg_losses_iou)\n\n    def process_predictions_and_anchors(self, anchor_list, valid_flag_list,\n                                        cls_scores, bbox_preds, img_metas,\n                                        gt_bboxes_ignore_list):\n        \"\"\"Compute common vars for regression and classification targets.\n\n        Args:\n            anchor_list (list[Tensor]): anchors of each image.\n            valid_flag_list (list[Tensor]): Valid flags of each image.\n            cls_scores (list[Tensor]): Classification scores for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * 4.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Return:\n            tuple[Tensor]: A tuple of common loss vars.\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        num_level_anchors_list = [num_level_anchors] * num_imgs\n\n        anchor_list_ = []\n        valid_flag_list_ = []\n        # concat all level anchors and flags to a single tensor\n        for i in range(num_imgs):\n            assert len(anchor_list[i]) == len(valid_flag_list[i])\n            anchor_list_.append(torch.cat(anchor_list[i]))\n            valid_flag_list_.append(torch.cat(valid_flag_list[i]))\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n\n        num_levels = len(cls_scores)\n        cls_score_list = []\n        bbox_pred_list = []\n\n        mlvl_cls_score_list = [\n            cls_score.permute(0, 2, 3, 1).reshape(\n                num_imgs, -1, self.num_base_priors * self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        mlvl_bbox_pred_list = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,\n                                                  self.num_base_priors * 4)\n            for bbox_pred in bbox_preds\n        ]\n\n        for i in range(num_imgs):\n            mlvl_cls_tensor_list = [\n                mlvl_cls_score_list[j][i] for j in range(num_levels)\n            ]\n            mlvl_bbox_tensor_list = [\n                mlvl_bbox_pred_list[j][i] for j in range(num_levels)\n            ]\n            cat_mlvl_cls_score = torch.cat(mlvl_cls_tensor_list, dim=0)\n            cat_mlvl_bbox_pred = torch.cat(mlvl_bbox_tensor_list, dim=0)\n            cls_score_list.append(cat_mlvl_cls_score)\n            bbox_pred_list.append(cat_mlvl_bbox_pred)\n        return (anchor_list_, valid_flag_list_, num_level_anchors_list,\n                cls_score_list, bbox_pred_list, gt_bboxes_ignore_list)\n\n    def get_cls_targets(self,\n                        anchor_list,\n                        valid_flag_list,\n                        num_level_anchors_list,\n                        cls_score_list,\n                        bbox_pred_list,\n                        gt_bboxes_list,\n                        img_metas,\n                        gt_bboxes_ignore_list=None,\n                        gt_labels_list=None,\n                        label_channels=1,\n                        unmap_outputs=True):\n        \"\"\"Get cls targets for DDOD head.\n\n        This method is almost the same as `AnchorHead.get_targets()`.\n        Besides returning the targets as the parent  method does,\n        it also returns the anchors as the first element of the\n        returned tuple.\n\n        Args:\n            anchor_list (list[Tensor]): anchors of each image.\n            valid_flag_list (list[Tensor]): Valid flags of each image.\n            num_level_anchors_list (list[Tensor]): Number of anchors of each\n                scale level of all image.\n            cls_score_list (list[Tensor]): Classification scores for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * num_classes.\n            bbox_pred_list (list[Tensor]): Box energies / deltas for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * 4.\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_labels_list (list[Tensor]): class indices corresponding to\n                each box.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Return:\n            tuple[Tensor]: A tuple of cls targets components.\n        \"\"\"\n        (all_anchors, all_labels, all_label_weights, all_bbox_targets,\n         all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single,\n             anchor_list,\n             valid_flag_list,\n             cls_score_list,\n             bbox_pred_list,\n             num_level_anchors_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             img_metas,\n             label_channels=label_channels,\n             unmap_outputs=unmap_outputs,\n             is_cls_assigner=True)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        anchors_list = images_to_levels(all_anchors, num_level_anchors_list[0])\n        labels_list = images_to_levels(all_labels, num_level_anchors_list[0])\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors_list[0])\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors_list[0])\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors_list[0])\n        return (anchors_list, labels_list, label_weights_list,\n                bbox_targets_list, bbox_weights_list, num_total_pos,\n                num_total_neg)\n\n    def get_reg_targets(self,\n                        anchor_list,\n                        valid_flag_list,\n                        num_level_anchors_list,\n                        cls_score_list,\n                        bbox_pred_list,\n                        gt_bboxes_list,\n                        img_metas,\n                        gt_bboxes_ignore_list=None,\n                        gt_labels_list=None,\n                        label_channels=1,\n                        unmap_outputs=True):\n        \"\"\"Get reg targets for DDOD head.\n\n        This method is almost the same as `AnchorHead.get_targets()` when\n        is_cls_assigner is False. Besides returning the targets as the parent\n        method does, it also returns the anchors as the first element of the\n        returned tuple.\n\n        Args:\n            anchor_list (list[Tensor]): anchors of each image.\n            valid_flag_list (list[Tensor]): Valid flags of each image.\n            num_level_anchors (int): Number of anchors of each scale level.\n            cls_scores (list[Tensor]): Classification scores for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                levels, each is a 4D-tensor, the channels number is\n                num_base_priors * 4.\n            gt_labels_list (list[Tensor]): class indices corresponding to\n                each box.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore_list (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Return:\n            tuple[Tensor]: A tuple of reg targets components.\n        \"\"\"\n        (all_anchors, all_labels, all_label_weights, all_bbox_targets,\n         all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single,\n             anchor_list,\n             valid_flag_list,\n             cls_score_list,\n             bbox_pred_list,\n             num_level_anchors_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             img_metas,\n             label_channels=label_channels,\n             unmap_outputs=unmap_outputs,\n             is_cls_assigner=False)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        anchors_list = images_to_levels(all_anchors, num_level_anchors_list[0])\n        labels_list = images_to_levels(all_labels, num_level_anchors_list[0])\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors_list[0])\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors_list[0])\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors_list[0])\n        return (anchors_list, labels_list, label_weights_list,\n                bbox_targets_list, bbox_weights_list, num_total_pos,\n                num_total_neg)\n\n    def _get_target_single(self,\n                           flat_anchors,\n                           valid_flags,\n                           cls_scores,\n                           bbox_preds,\n                           num_level_anchors,\n                           gt_bboxes,\n                           gt_bboxes_ignore,\n                           gt_labels,\n                           img_meta,\n                           label_channels=1,\n                           unmap_outputs=True,\n                           is_cls_assigner=True):\n        \"\"\"Compute regression, classification targets for anchors in a single\n        image.\n\n        Args:\n            flat_anchors (Tensor): Multi-level anchors of the image,\n                which are concatenated into a single tensor of shape\n                (num_base_priors, 4).\n            valid_flags (Tensor): Multi level valid flags of the image,\n                which are concatenated into a single tensor of\n                shape (num_base_priors,).\n            cls_scores (Tensor): Classification scores for all scale\n                levels of the image.\n            bbox_preds (Tensor): Box energies / deltas for all scale\n                levels of the image.\n            num_level_anchors (list[int]): Number of anchors of each\n                scale level.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, ).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts, ).\n            img_meta (dict): Meta info of the image.\n            label_channels (int): Channel of label. Default: 1.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors. Default: True.\n            is_cls_assigner (bool): Classification or regression.\n                Default: True.\n\n        Returns:\n            tuple: N is the number of total anchors in the image.\n                - labels (Tensor): Labels of all anchors in the image with \\\n                    shape (N, ).\n                - label_weights (Tensor): Label weights of all anchor in the \\\n                    image with shape (N, ).\n                - bbox_targets (Tensor): BBox targets of all anchors in the \\\n                    image with shape (N, 4).\n                - bbox_weights (Tensor): BBox weights of all anchors in the \\\n                    image with shape (N, 4)\n                - pos_inds (Tensor): Indices of positive anchor with shape \\\n                    (num_pos, ).\n                - neg_inds (Tensor): Indices of negative anchor with shape \\\n                    (num_neg, ).\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 7\n        # assign gt and sample anchors\n        anchors = flat_anchors[inside_flags, :]\n\n        num_level_anchors_inside = self.get_num_level_anchors_inside(\n            num_level_anchors, inside_flags)\n        bbox_preds_valid = bbox_preds[inside_flags, :]\n        cls_scores_valid = cls_scores[inside_flags, :]\n\n        assigner = self.cls_assigner if is_cls_assigner else self.reg_assigner\n\n        # decode prediction out of assigner\n        bbox_preds_valid = self.bbox_coder.decode(anchors, bbox_preds_valid)\n        assign_result = assigner.assign(anchors, num_level_anchors_inside,\n                                        gt_bboxes, gt_bboxes_ignore, gt_labels,\n                                        cls_scores_valid, bbox_preds_valid)\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            if hasattr(self, 'bbox_coder'):\n                pos_bbox_targets = self.bbox_coder.encode(\n                    sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n            else:\n                # used in VFNetHead\n                pos_bbox_targets = sampling_result.pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class since v2.5.0\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            anchors = unmap(anchors, num_total_anchors, inside_flags)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags, fill=self.num_classes)\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n\n        return (anchors, labels, label_weights, bbox_targets, bbox_weights,\n                pos_inds, neg_inds)\n\n    def get_num_level_anchors_inside(self, num_level_anchors, inside_flags):\n        \"\"\"Get the anchors of each scale level inside.\n\n        Args:\n            num_level_anchors (list[int]): Number of anchors of each\n                scale level.\n            inside_flags (Tensor): Multi level inside flags of the image,\n                which are concatenated into a single tensor of\n                shape (num_base_priors,).\n\n        Returns:\n            list[int]: Number of anchors of each scale level inside.\n        \"\"\"\n        split_inside_flags = torch.split(inside_flags, num_level_anchors)\n        num_level_anchors_inside = [\n            int(flags.sum()) for flags in split_inside_flags\n        ]\n        return num_level_anchors_inside\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/deformable_detr_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Linear, bias_init_with_prob, constant_init\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import multi_apply\nfrom mmdet.models.utils.transformer import inverse_sigmoid\nfrom ..builder import HEADS\nfrom .detr_head import DETRHead\n\n\n@HEADS.register_module()\nclass DeformableDETRHead(DETRHead):\n    \"\"\"Head of DeformDETR: Deformable DETR: Deformable Transformers for End-to-\n    End Object Detection.\n\n    Code is modified from the `official github repo\n    <https://github.com/fundamentalvision/Deformable-DETR>`_.\n\n    More details can be found in the `paper\n    <https://arxiv.org/abs/2010.04159>`_ .\n\n    Args:\n        with_box_refine (bool): Whether to refine the reference points\n            in the decoder. Defaults to False.\n        as_two_stage (bool) : Whether to generate the proposal from\n            the outputs of encoder.\n        transformer (obj:`ConfigDict`): ConfigDict is used for building\n            the Encoder and Decoder.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 with_box_refine=False,\n                 as_two_stage=False,\n                 transformer=None,\n                 **kwargs):\n        self.with_box_refine = with_box_refine\n        self.as_two_stage = as_two_stage\n        if self.as_two_stage:\n            transformer['as_two_stage'] = self.as_two_stage\n\n        super(DeformableDETRHead, self).__init__(\n            *args, transformer=transformer, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize classification branch and regression branch of head.\"\"\"\n\n        fc_cls = Linear(self.embed_dims, self.cls_out_channels)\n        reg_branch = []\n        for _ in range(self.num_reg_fcs):\n            reg_branch.append(Linear(self.embed_dims, self.embed_dims))\n            reg_branch.append(nn.ReLU())\n        reg_branch.append(Linear(self.embed_dims, 4))\n        reg_branch = nn.Sequential(*reg_branch)\n\n        def _get_clones(module, N):\n            return nn.ModuleList([copy.deepcopy(module) for i in range(N)])\n\n        # last reg_branch is used to generate proposal from\n        # encode feature map when as_two_stage is True.\n        num_pred = (self.transformer.decoder.num_layers + 1) if \\\n            self.as_two_stage else self.transformer.decoder.num_layers\n\n        if self.with_box_refine:\n            self.cls_branches = _get_clones(fc_cls, num_pred)\n            self.reg_branches = _get_clones(reg_branch, num_pred)\n        else:\n\n            self.cls_branches = nn.ModuleList(\n                [fc_cls for _ in range(num_pred)])\n            self.reg_branches = nn.ModuleList(\n                [reg_branch for _ in range(num_pred)])\n\n        if not self.as_two_stage:\n            self.query_embedding = nn.Embedding(self.num_query,\n                                                self.embed_dims * 2)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the DeformDETR head.\"\"\"\n        self.transformer.init_weights()\n        if self.loss_cls.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            for m in self.cls_branches:\n                nn.init.constant_(m.bias, bias_init)\n        for m in self.reg_branches:\n            constant_init(m[-1], 0, bias=0)\n        nn.init.constant_(self.reg_branches[0][-1].bias.data[2:], -2.0)\n        if self.as_two_stage:\n            for m in self.reg_branches:\n                nn.init.constant_(m[-1].bias.data[2:], 0.0)\n\n    def forward(self, mlvl_feats, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            mlvl_feats (tuple[Tensor]): Features from the upstream\n                network, each is a 4D-tensor with shape\n                (N, C, H, W).\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            all_cls_scores (Tensor): Outputs from the classification head, \\\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression \\\n                head with normalized coordinate format (cx, cy, w, h). \\\n                Shape [nb_dec, bs, num_query, 4].\n            enc_outputs_class (Tensor): The score of each point on encode \\\n                feature map, has shape (N, h*w, num_class). Only when \\\n                as_two_stage is True it would be returned, otherwise \\\n                `None` would be returned.\n            enc_outputs_coord (Tensor): The proposal generate from the \\\n                encode feature map, has shape (N, h*w, 4). Only when \\\n                as_two_stage is True it would be returned, otherwise \\\n                `None` would be returned.\n        \"\"\"\n\n        batch_size = mlvl_feats[0].size(0)\n        input_img_h, input_img_w = img_metas[0]['batch_input_shape']\n        img_masks = mlvl_feats[0].new_ones(\n            (batch_size, input_img_h, input_img_w))\n        for img_id in range(batch_size):\n            img_h, img_w, _ = img_metas[img_id]['img_shape']\n            img_masks[img_id, :img_h, :img_w] = 0\n\n        mlvl_masks = []\n        mlvl_positional_encodings = []\n        for feat in mlvl_feats:\n            mlvl_masks.append(\n                F.interpolate(img_masks[None],\n                              size=feat.shape[-2:]).to(torch.bool).squeeze(0))\n            mlvl_positional_encodings.append(\n                self.positional_encoding(mlvl_masks[-1]))\n\n        query_embeds = None\n        if not self.as_two_stage:\n            query_embeds = self.query_embedding.weight\n        hs, init_reference, inter_references, \\\n            enc_outputs_class, enc_outputs_coord = self.transformer(\n                    mlvl_feats,\n                    mlvl_masks,\n                    query_embeds,\n                    mlvl_positional_encodings,\n                    reg_branches=self.reg_branches if self.with_box_refine else None,  # noqa:E501\n                    cls_branches=self.cls_branches if self.as_two_stage else None  # noqa:E501\n            )\n        hs = hs.permute(0, 2, 1, 3)\n        outputs_classes = []\n        outputs_coords = []\n\n        for lvl in range(hs.shape[0]):\n            if lvl == 0:\n                reference = init_reference\n            else:\n                reference = inter_references[lvl - 1]\n            reference = inverse_sigmoid(reference)\n            outputs_class = self.cls_branches[lvl](hs[lvl])\n            tmp = self.reg_branches[lvl](hs[lvl])\n            if reference.shape[-1] == 4:\n                tmp += reference\n            else:\n                assert reference.shape[-1] == 2\n                tmp[..., :2] += reference\n            outputs_coord = tmp.sigmoid()\n            outputs_classes.append(outputs_class)\n            outputs_coords.append(outputs_coord)\n\n        outputs_classes = torch.stack(outputs_classes)\n        outputs_coords = torch.stack(outputs_coords)\n        if self.as_two_stage:\n            return outputs_classes, outputs_coords, \\\n                enc_outputs_class, \\\n                enc_outputs_coord.sigmoid()\n        else:\n            return outputs_classes, outputs_coords, \\\n                None, None\n\n    @force_fp32(apply_to=('all_cls_scores_list', 'all_bbox_preds_list'))\n    def loss(self,\n             all_cls_scores,\n             all_bbox_preds,\n             enc_cls_scores,\n             enc_bbox_preds,\n             gt_bboxes_list,\n             gt_labels_list,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"\"Loss function.\n\n        Args:\n            all_cls_scores (Tensor): Classification score of all\n                decoder layers, has shape\n                [nb_dec, bs, num_query, cls_out_channels].\n            all_bbox_preds (Tensor): Sigmoid regression\n                outputs of all decode layers. Each is a 4D-tensor with\n                normalized coordinate format (cx, cy, w, h) and shape\n                [nb_dec, bs, num_query, 4].\n            enc_cls_scores (Tensor): Classification scores of\n                points on encode feature map , has shape\n                (N, h*w, num_classes). Only be passed when as_two_stage is\n                True, otherwise is None.\n            enc_bbox_preds (Tensor): Regression results of each points\n                on the encode feature map, has shape (N, h*w, 4). Only be\n                passed when as_two_stage is True, otherwise is None.\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            img_metas (list[dict]): List of image meta information.\n            gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                which can be ignored for each image. Default None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert gt_bboxes_ignore is None, \\\n            f'{self.__class__.__name__} only supports ' \\\n            f'for gt_bboxes_ignore setting to None.'\n\n        num_dec_layers = len(all_cls_scores)\n        all_gt_bboxes_list = [gt_bboxes_list for _ in range(num_dec_layers)]\n        all_gt_labels_list = [gt_labels_list for _ in range(num_dec_layers)]\n        all_gt_bboxes_ignore_list = [\n            gt_bboxes_ignore for _ in range(num_dec_layers)\n        ]\n        img_metas_list = [img_metas for _ in range(num_dec_layers)]\n\n        losses_cls, losses_bbox, losses_iou = multi_apply(\n            self.loss_single, all_cls_scores, all_bbox_preds,\n            all_gt_bboxes_list, all_gt_labels_list, img_metas_list,\n            all_gt_bboxes_ignore_list)\n\n        loss_dict = dict()\n        # loss of proposal generated from encode feature map.\n        if enc_cls_scores is not None:\n            binary_labels_list = [\n                torch.zeros_like(gt_labels_list[i])\n                for i in range(len(img_metas))\n            ]\n            enc_loss_cls, enc_losses_bbox, enc_losses_iou = \\\n                self.loss_single(enc_cls_scores, enc_bbox_preds,\n                                 gt_bboxes_list, binary_labels_list,\n                                 img_metas, gt_bboxes_ignore)\n            loss_dict['enc_loss_cls'] = enc_loss_cls\n            loss_dict['enc_loss_bbox'] = enc_losses_bbox\n            loss_dict['enc_loss_iou'] = enc_losses_iou\n\n        # loss from the last decoder layer\n        loss_dict['loss_cls'] = losses_cls[-1]\n        loss_dict['loss_bbox'] = losses_bbox[-1]\n        loss_dict['loss_iou'] = losses_iou[-1]\n        # loss from other decoder layers\n        num_dec_layer = 0\n        for loss_cls_i, loss_bbox_i, loss_iou_i in zip(losses_cls[:-1],\n                                                       losses_bbox[:-1],\n                                                       losses_iou[:-1]):\n            loss_dict[f'd{num_dec_layer}.loss_cls'] = loss_cls_i\n            loss_dict[f'd{num_dec_layer}.loss_bbox'] = loss_bbox_i\n            loss_dict[f'd{num_dec_layer}.loss_iou'] = loss_iou_i\n            num_dec_layer += 1\n        return loss_dict\n\n    @force_fp32(apply_to=('all_cls_scores_list', 'all_bbox_preds_list'))\n    def get_bboxes(self,\n                   all_cls_scores,\n                   all_bbox_preds,\n                   enc_cls_scores,\n                   enc_bbox_preds,\n                   img_metas,\n                   rescale=False):\n        \"\"\"Transform network outputs for a batch into bbox predictions.\n\n        Args:\n            all_cls_scores (Tensor): Classification score of all\n                decoder layers, has shape\n                [nb_dec, bs, num_query, cls_out_channels].\n            all_bbox_preds (Tensor): Sigmoid regression\n                outputs of all decode layers. Each is a 4D-tensor with\n                normalized coordinate format (cx, cy, w, h) and shape\n                [nb_dec, bs, num_query, 4].\n            enc_cls_scores (Tensor): Classification scores of\n                points on encode feature map , has shape\n                (N, h*w, num_classes). Only be passed when as_two_stage is\n                True, otherwise is None.\n            enc_bbox_preds (Tensor): Regression results of each points\n                on the encode feature map, has shape (N, h*w, 4). Only be\n                passed when as_two_stage is True, otherwise is None.\n            img_metas (list[dict]): Meta information of each image.\n            rescale (bool, optional): If True, return boxes in original\n                image space. Default False.\n\n        Returns:\n            list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple. \\\n                The first item is an (n, 5) tensor, where the first 4 columns \\\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the \\\n                5-th column is a score between 0 and 1. The second item is a \\\n                (n,) tensor where each item is the predicted class label of \\\n                the corresponding box.\n        \"\"\"\n        cls_scores = all_cls_scores[-1]\n        bbox_preds = all_bbox_preds[-1]\n\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score = cls_scores[img_id]\n            bbox_pred = bbox_preds[img_id]\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            proposals = self._get_bboxes_single(cls_score, bbox_pred,\n                                                img_shape, scale_factor,\n                                                rescale)\n            result_list.append(proposals)\n        return result_list\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/dense_test_mixins.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport sys\nfrom inspect import signature\n\nimport torch\nfrom mmcv.ops import batched_nms\n\nfrom mmdet.core import bbox_mapping_back, merge_aug_proposals\n\nif sys.version_info >= (3, 7):\n    from mmdet.utils.contextmanagers import completed\n\n\nclass BBoxTestMixin(object):\n    \"\"\"Mixin class for testing det bboxes via DenseHead.\"\"\"\n\n    def simple_test_bboxes(self, feats, img_metas, rescale=False):\n        \"\"\"Test det bboxes without test-time augmentation, can be applied in\n        DenseHead except for ``RPNHead`` and its variants, e.g., ``GARPNHead``,\n        etc.\n\n        Args:\n            feats (tuple[torch.Tensor]): Multi-level features from the\n                upstream network, each is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is ``bboxes`` with shape (n, 5),\n                where 5 represent (tl_x, tl_y, br_x, br_y, score).\n                The shape of the second tensor in the tuple is ``labels``\n                with shape (n,)\n        \"\"\"\n        outs = self.forward(feats)\n        results_list = self.get_bboxes(\n            *outs, img_metas=img_metas, rescale=rescale)\n        return results_list\n\n    def aug_test_bboxes(self, feats, img_metas, rescale=False):\n        \"\"\"Test det bboxes with test time augmentation, can be applied in\n        DenseHead except for ``RPNHead`` and its variants, e.g., ``GARPNHead``,\n        etc.\n\n        Args:\n            feats (list[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains features for all images in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch. each dict has image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is ``bboxes`` with shape (n, 5),\n                where 5 represent (tl_x, tl_y, br_x, br_y, score).\n                The shape of the second tensor in the tuple is ``labels``\n                with shape (n,). The length of list should always be 1.\n        \"\"\"\n        # check with_nms argument\n        gb_sig = signature(self.get_bboxes)\n        gb_args = [p.name for p in gb_sig.parameters.values()]\n        gbs_sig = signature(self._get_bboxes_single)\n        gbs_args = [p.name for p in gbs_sig.parameters.values()]\n        assert ('with_nms' in gb_args) and ('with_nms' in gbs_args), \\\n            f'{self.__class__.__name__}' \\\n            ' does not support test-time augmentation'\n\n        aug_bboxes = []\n        aug_scores = []\n        aug_labels = []\n        for x, img_meta in zip(feats, img_metas):\n            # only one image in the batch\n            outs = self.forward(x)\n            bbox_outputs = self.get_bboxes(\n                *outs,\n                img_metas=img_meta,\n                cfg=self.test_cfg,\n                rescale=False,\n                with_nms=False)[0]\n            aug_bboxes.append(bbox_outputs[0])\n            aug_scores.append(bbox_outputs[1])\n            if len(bbox_outputs) >= 3:\n                aug_labels.append(bbox_outputs[2])\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = self.merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas)\n        merged_labels = torch.cat(aug_labels, dim=0) if aug_labels else None\n\n        if merged_bboxes.numel() == 0:\n            det_bboxes = torch.cat([merged_bboxes, merged_scores[:, None]], -1)\n            return [\n                (det_bboxes, merged_labels),\n            ]\n\n        det_bboxes, keep_idxs = batched_nms(merged_bboxes, merged_scores,\n                                            merged_labels, self.test_cfg.nms)\n        det_bboxes = det_bboxes[:self.test_cfg.max_per_img]\n        det_labels = merged_labels[keep_idxs][:self.test_cfg.max_per_img]\n\n        if rescale:\n            _det_bboxes = det_bboxes\n        else:\n            _det_bboxes = det_bboxes.clone()\n            _det_bboxes[:, :4] *= det_bboxes.new_tensor(\n                img_metas[0][0]['scale_factor'])\n\n        return [\n            (_det_bboxes, det_labels),\n        ]\n\n    def simple_test_rpn(self, x, img_metas):\n        \"\"\"Test without augmentation, only for ``RPNHead`` and its variants,\n        e.g., ``GARPNHead``, etc.\n\n        Args:\n            x (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n            img_metas (list[dict]): Meta info of each image.\n\n        Returns:\n            list[Tensor]: Proposals of each image, each item has shape (n, 5),\n                where 5 represent (tl_x, tl_y, br_x, br_y, score).\n        \"\"\"\n        rpn_outs = self(x)\n        proposal_list = self.get_bboxes(*rpn_outs, img_metas=img_metas)\n        return proposal_list\n\n    def aug_test_rpn(self, feats, img_metas):\n        \"\"\"Test with augmentation for only for ``RPNHead`` and its variants,\n        e.g., ``GARPNHead``, etc.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                        a 4D-tensor.\n            img_metas (list[dict]): Meta info of each image.\n\n        Returns:\n            list[Tensor]: Proposals of each image, each item has shape (n, 5),\n                where 5 represent (tl_x, tl_y, br_x, br_y, score).\n        \"\"\"\n        samples_per_gpu = len(img_metas[0])\n        aug_proposals = [[] for _ in range(samples_per_gpu)]\n        for x, img_meta in zip(feats, img_metas):\n            proposal_list = self.simple_test_rpn(x, img_meta)\n            for i, proposals in enumerate(proposal_list):\n                aug_proposals[i].append(proposals)\n        # reorganize the order of 'img_metas' to match the dimensions\n        # of 'aug_proposals'\n        aug_img_metas = []\n        for i in range(samples_per_gpu):\n            aug_img_meta = []\n            for j in range(len(img_metas)):\n                aug_img_meta.append(img_metas[j][i])\n            aug_img_metas.append(aug_img_meta)\n        # after merging, proposals will be rescaled to the original image size\n        merged_proposals = [\n            merge_aug_proposals(proposals, aug_img_meta, self.test_cfg)\n            for proposals, aug_img_meta in zip(aug_proposals, aug_img_metas)\n        ]\n        return merged_proposals\n\n    if sys.version_info >= (3, 7):\n\n        async def async_simple_test_rpn(self, x, img_metas):\n            sleep_interval = self.test_cfg.pop('async_sleep_interval', 0.025)\n            async with completed(\n                    __name__, 'rpn_head_forward',\n                    sleep_interval=sleep_interval):\n                rpn_outs = self(x)\n\n            proposal_list = self.get_bboxes(*rpn_outs, img_metas=img_metas)\n            return proposal_list\n\n    def merge_aug_bboxes(self, aug_bboxes, aug_scores, img_metas):\n        \"\"\"Merge augmented detection bboxes and scores.\n\n        Args:\n            aug_bboxes (list[Tensor]): shape (n, 4*#class)\n            aug_scores (list[Tensor] or None): shape (n, #class)\n            img_shapes (list[Tensor]): shape (3, ).\n\n        Returns:\n            tuple[Tensor]: ``bboxes`` with shape (n,4), where\n            4 represent (tl_x, tl_y, br_x, br_y)\n            and ``scores`` with shape (n,).\n        \"\"\"\n        recovered_bboxes = []\n        for bboxes, img_info in zip(aug_bboxes, img_metas):\n            img_shape = img_info[0]['img_shape']\n            scale_factor = img_info[0]['scale_factor']\n            flip = img_info[0]['flip']\n            flip_direction = img_info[0]['flip_direction']\n            bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip,\n                                       flip_direction)\n            recovered_bboxes.append(bboxes)\n        bboxes = torch.cat(recovered_bboxes, dim=0)\n        if aug_scores is None:\n            return bboxes\n        else:\n            scores = torch.cat(aug_scores, dim=0)\n            return bboxes, scores\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/detr_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Conv2d, Linear, build_activation_layer\nfrom mmcv.cnn.bricks.transformer import FFN, build_positional_encoding\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (bbox_cxcywh_to_xyxy, bbox_xyxy_to_cxcywh,\n                        build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom mmdet.models.utils import build_transformer\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_head import AnchorFreeHead\n\n\n@HEADS.register_module()\nclass DETRHead(AnchorFreeHead):\n    \"\"\"Implements the DETR transformer head.\n\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n\n    Args:\n        num_classes (int): Number of categories excluding the background.\n        in_channels (int): Number of channels in the input feature map.\n        num_query (int): Number of query in Transformer.\n        num_reg_fcs (int, optional): Number of fully-connected layers used in\n            `FFN`, which is then used for the regression head. Default 2.\n        transformer (obj:`mmcv.ConfigDict`|dict): Config for transformer.\n            Default: None.\n        sync_cls_avg_factor (bool): Whether to sync the avg_factor of\n            all ranks. Default to False.\n        positional_encoding (obj:`mmcv.ConfigDict`|dict):\n            Config for position encoding.\n        loss_cls (obj:`mmcv.ConfigDict`|dict): Config of the\n            classification loss. Default `CrossEntropyLoss`.\n        loss_bbox (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression loss. Default `L1Loss`.\n        loss_iou (obj:`mmcv.ConfigDict`|dict): Config of the\n            regression iou loss. Default `GIoULoss`.\n        tran_cfg (obj:`mmcv.ConfigDict`|dict): Training config of\n            transformer head.\n        test_cfg (obj:`mmcv.ConfigDict`|dict): Testing config of\n            transformer head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    _version = 2\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 num_query=100,\n                 num_reg_fcs=2,\n                 transformer=None,\n                 sync_cls_avg_factor=False,\n                 positional_encoding=dict(\n                     type='SinePositionalEncoding',\n                     num_feats=128,\n                     normalize=True),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     bg_cls_weight=0.1,\n                     use_sigmoid=False,\n                     loss_weight=1.0,\n                     class_weight=1.0),\n                 loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),\n                 train_cfg=dict(\n                     assigner=dict(\n                         type='HungarianAssigner',\n                         cls_cost=dict(type='ClassificationCost', weight=1.),\n                         reg_cost=dict(type='BBoxL1Cost', weight=5.0),\n                         iou_cost=dict(\n                             type='IoUCost', iou_mode='giou', weight=2.0))),\n                 test_cfg=dict(max_per_img=100),\n                 init_cfg=None,\n                 **kwargs):\n        # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,\n        # since it brings inconvenience when the initialization of\n        # `AnchorFreeHead` is called.\n        super(AnchorFreeHead, self).__init__(init_cfg)\n        self.bg_cls_weight = 0\n        self.sync_cls_avg_factor = sync_cls_avg_factor\n        class_weight = loss_cls.get('class_weight', None)\n        if class_weight is not None and (self.__class__ is DETRHead):\n            assert isinstance(class_weight, float), 'Expected ' \\\n                'class_weight to have type float. Found ' \\\n                f'{type(class_weight)}.'\n            # NOTE following the official DETR rep0, bg_cls_weight means\n            # relative classification weight of the no-object class.\n            bg_cls_weight = loss_cls.get('bg_cls_weight', class_weight)\n            assert isinstance(bg_cls_weight, float), 'Expected ' \\\n                'bg_cls_weight to have type float. Found ' \\\n                f'{type(bg_cls_weight)}.'\n            class_weight = torch.ones(num_classes + 1) * class_weight\n            # set background class as the last indice\n            class_weight[num_classes] = bg_cls_weight\n            loss_cls.update({'class_weight': class_weight})\n            if 'bg_cls_weight' in loss_cls:\n                loss_cls.pop('bg_cls_weight')\n            self.bg_cls_weight = bg_cls_weight\n\n        if train_cfg:\n            assert 'assigner' in train_cfg, 'assigner should be provided '\\\n                'when train_cfg is set.'\n            assigner = train_cfg['assigner']\n            assert loss_cls['loss_weight'] == assigner['cls_cost']['weight'], \\\n                'The classification weight for loss and matcher should be' \\\n                'exactly the same.'\n            assert loss_bbox['loss_weight'] == assigner['reg_cost'][\n                'weight'], 'The regression L1 weight for loss and matcher ' \\\n                'should be exactly the same.'\n            assert loss_iou['loss_weight'] == assigner['iou_cost']['weight'], \\\n                'The regression iou weight for loss and matcher should be' \\\n                'exactly the same.'\n            self.assigner = build_assigner(assigner)\n            # DETR sampling=False, so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.num_query = num_query\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.num_reg_fcs = num_reg_fcs\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self.fp16_enabled = False\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_iou = build_loss(loss_iou)\n\n        if self.loss_cls.use_sigmoid:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n        self.act_cfg = transformer.get('act_cfg',\n                                       dict(type='ReLU', inplace=True))\n        self.activate = build_activation_layer(self.act_cfg)\n        self.positional_encoding = build_positional_encoding(\n            positional_encoding)\n        self.transformer = build_transformer(transformer)\n        self.embed_dims = self.transformer.embed_dims\n        assert 'num_feats' in positional_encoding\n        num_feats = positional_encoding['num_feats']\n        assert num_feats * 2 == self.embed_dims, 'embed_dims should' \\\n            f' be exactly 2 times of num_feats. Found {self.embed_dims}' \\\n            f' and {num_feats}.'\n        self._init_layers()\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the transformer head.\"\"\"\n        self.input_proj = Conv2d(\n            self.in_channels, self.embed_dims, kernel_size=1)\n        self.fc_cls = Linear(self.embed_dims, self.cls_out_channels)\n        self.reg_ffn = FFN(\n            self.embed_dims,\n            self.embed_dims,\n            self.num_reg_fcs,\n            self.act_cfg,\n            dropout=0.0,\n            add_residual=False)\n        self.fc_reg = Linear(self.embed_dims, 4)\n        self.query_embedding = nn.Embedding(self.num_query, self.embed_dims)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the transformer head.\"\"\"\n        # The initialization for transformer is important\n        self.transformer.init_weights()\n\n    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\n                              missing_keys, unexpected_keys, error_msgs):\n        \"\"\"load checkpoints.\"\"\"\n        # NOTE here use `AnchorFreeHead` instead of `TransformerHead`,\n        # since `AnchorFreeHead._load_from_state_dict` should not be\n        # called here. Invoking the default `Module._load_from_state_dict`\n        # is enough.\n\n        # Names of some parameters in has been changed.\n        version = local_metadata.get('version', None)\n        if (version is None or version < 2) and self.__class__ is DETRHead:\n            convert_dict = {\n                '.self_attn.': '.attentions.0.',\n                '.ffn.': '.ffns.0.',\n                '.multihead_attn.': '.attentions.1.',\n                '.decoder.norm.': '.decoder.post_norm.'\n            }\n            state_dict_keys = list(state_dict.keys())\n            for k in state_dict_keys:\n                for ori_key, convert_key in convert_dict.items():\n                    if ori_key in k:\n                        convert_key = k.replace(ori_key, convert_key)\n                        state_dict[convert_key] = state_dict[k]\n                        del state_dict[k]\n\n        super(AnchorFreeHead,\n              self)._load_from_state_dict(state_dict, prefix, local_metadata,\n                                          strict, missing_keys,\n                                          unexpected_keys, error_msgs)\n\n    def forward(self, feats, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple[list[Tensor], list[Tensor]]: Outputs for all scale levels.\n\n                - all_cls_scores_list (list[Tensor]): Classification scores \\\n                    for each scale level. Each is a 4D-tensor with shape \\\n                    [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                    `cls_out_channels` should includes background.\n                - all_bbox_preds_list (list[Tensor]): Sigmoid regression \\\n                    outputs for each scale level. Each is a 4D-tensor with \\\n                    normalized coordinate format (cx, cy, w, h) and shape \\\n                    [nb_dec, bs, num_query, 4].\n        \"\"\"\n        num_levels = len(feats)\n        img_metas_list = [img_metas for _ in range(num_levels)]\n        return multi_apply(self.forward_single, feats, img_metas_list)\n\n    def forward_single(self, x, img_metas):\n        \"\"\"\"Forward function for a single feature level.\n\n        Args:\n            x (Tensor): Input feature from backbone's single stage, shape\n                [bs, c, h, w].\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            all_cls_scores (Tensor): Outputs from the classification head,\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression\n                head with normalized coordinate format (cx, cy, w, h).\n                Shape [nb_dec, bs, num_query, 4].\n        \"\"\"\n        # construct binary masks which used for the transformer.\n        # NOTE following the official DETR repo, non-zero values representing\n        # ignored positions, while zero values means valid positions.\n        batch_size = x.size(0)\n        input_img_h, input_img_w = img_metas[0]['batch_input_shape']\n        masks = x.new_ones((batch_size, input_img_h, input_img_w))\n        for img_id in range(batch_size):\n            img_h, img_w, _ = img_metas[img_id]['img_shape']\n            masks[img_id, :img_h, :img_w] = 0\n\n        x = self.input_proj(x)\n        # interpolate masks to have the same spatial shape with x\n        masks = F.interpolate(\n            masks.unsqueeze(1), size=x.shape[-2:]).to(torch.bool).squeeze(1)\n        # position encoding\n        pos_embed = self.positional_encoding(masks)  # [bs, embed_dim, h, w]\n        # outs_dec: [nb_dec, bs, num_query, embed_dim]\n        outs_dec, _ = self.transformer(x, masks, self.query_embedding.weight,\n                                       pos_embed)\n\n        all_cls_scores = self.fc_cls(outs_dec)\n        all_bbox_preds = self.fc_reg(self.activate(\n            self.reg_ffn(outs_dec))).sigmoid()\n        return all_cls_scores, all_bbox_preds\n\n    @force_fp32(apply_to=('all_cls_scores_list', 'all_bbox_preds_list'))\n    def loss(self,\n             all_cls_scores_list,\n             all_bbox_preds_list,\n             gt_bboxes_list,\n             gt_labels_list,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"\"Loss function.\n\n        Only outputs from the last feature level are used for computing\n        losses by default.\n\n        Args:\n            all_cls_scores_list (list[Tensor]): Classification outputs\n                for each feature level. Each is a 4D-tensor with shape\n                [nb_dec, bs, num_query, cls_out_channels].\n            all_bbox_preds_list (list[Tensor]): Sigmoid regression\n                outputs for each feature level. Each is a 4D-tensor with\n                normalized coordinate format (cx, cy, w, h) and shape\n                [nb_dec, bs, num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            img_metas (list[dict]): List of image meta information.\n            gt_bboxes_ignore (list[Tensor], optional): Bounding boxes\n                which can be ignored for each image. Default None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        # NOTE defaultly only the outputs from the last feature scale is used.\n        all_cls_scores = all_cls_scores_list[-1]\n        all_bbox_preds = all_bbox_preds_list[-1]\n        assert gt_bboxes_ignore is None, \\\n            'Only supports for gt_bboxes_ignore setting to None.'\n\n        num_dec_layers = len(all_cls_scores)\n        all_gt_bboxes_list = [gt_bboxes_list for _ in range(num_dec_layers)]\n        all_gt_labels_list = [gt_labels_list for _ in range(num_dec_layers)]\n        all_gt_bboxes_ignore_list = [\n            gt_bboxes_ignore for _ in range(num_dec_layers)\n        ]\n        img_metas_list = [img_metas for _ in range(num_dec_layers)]\n\n        losses_cls, losses_bbox, losses_iou = multi_apply(\n            self.loss_single, all_cls_scores, all_bbox_preds,\n            all_gt_bboxes_list, all_gt_labels_list, img_metas_list,\n            all_gt_bboxes_ignore_list)\n\n        loss_dict = dict()\n        # loss from the last decoder layer\n        loss_dict['loss_cls'] = losses_cls[-1]\n        loss_dict['loss_bbox'] = losses_bbox[-1]\n        loss_dict['loss_iou'] = losses_iou[-1]\n        # loss from other decoder layers\n        num_dec_layer = 0\n        for loss_cls_i, loss_bbox_i, loss_iou_i in zip(losses_cls[:-1],\n                                                       losses_bbox[:-1],\n                                                       losses_iou[:-1]):\n            loss_dict[f'd{num_dec_layer}.loss_cls'] = loss_cls_i\n            loss_dict[f'd{num_dec_layer}.loss_bbox'] = loss_bbox_i\n            loss_dict[f'd{num_dec_layer}.loss_iou'] = loss_iou_i\n            num_dec_layer += 1\n        return loss_dict\n\n    def loss_single(self,\n                    cls_scores,\n                    bbox_preds,\n                    gt_bboxes_list,\n                    gt_labels_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None):\n        \"\"\"\"Loss function for outputs from a single decoder layer of a single\n        feature level.\n\n        Args:\n            cls_scores (Tensor): Box score logits from a single decoder layer\n                for all images. Shape [bs, num_query, cls_out_channels].\n            bbox_preds (Tensor): Sigmoid outputs from a single decoder layer\n                for all images, with normalized coordinate (cx, cy, w, h) and\n                shape [bs, num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            img_metas (list[dict]): List of image meta information.\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components for outputs from\n                a single decoder layer.\n        \"\"\"\n        num_imgs = cls_scores.size(0)\n        cls_scores_list = [cls_scores[i] for i in range(num_imgs)]\n        bbox_preds_list = [bbox_preds[i] for i in range(num_imgs)]\n        cls_reg_targets = self.get_targets(cls_scores_list, bbox_preds_list,\n                                           gt_bboxes_list, gt_labels_list,\n                                           img_metas, gt_bboxes_ignore_list)\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        labels = torch.cat(labels_list, 0)\n        label_weights = torch.cat(label_weights_list, 0)\n        bbox_targets = torch.cat(bbox_targets_list, 0)\n        bbox_weights = torch.cat(bbox_weights_list, 0)\n\n        # classification loss\n        cls_scores = cls_scores.reshape(-1, self.cls_out_channels)\n        # construct weighted avg_factor to match with the official DETR repo\n        cls_avg_factor = num_total_pos * 1.0 + \\\n            num_total_neg * self.bg_cls_weight\n        if self.sync_cls_avg_factor:\n            cls_avg_factor = reduce_mean(\n                cls_scores.new_tensor([cls_avg_factor]))\n        cls_avg_factor = max(cls_avg_factor, 1)\n\n        loss_cls = self.loss_cls(\n            cls_scores, labels, label_weights, avg_factor=cls_avg_factor)\n\n        # Compute the average number of gt boxes across all gpus, for\n        # normalization purposes\n        num_total_pos = loss_cls.new_tensor([num_total_pos])\n        num_total_pos = torch.clamp(reduce_mean(num_total_pos), min=1).item()\n\n        # construct factors used for rescale bboxes\n        factors = []\n        for img_meta, bbox_pred in zip(img_metas, bbox_preds):\n            img_h, img_w, _ = img_meta['img_shape']\n            factor = bbox_pred.new_tensor([img_w, img_h, img_w,\n                                           img_h]).unsqueeze(0).repeat(\n                                               bbox_pred.size(0), 1)\n            factors.append(factor)\n        factors = torch.cat(factors, 0)\n\n        # DETR regress the relative position of boxes (cxcywh) in the image,\n        # thus the learning target is normalized by the image size. So here\n        # we need to re-scale them for calculating IoU loss\n        bbox_preds = bbox_preds.reshape(-1, 4)\n        bboxes = bbox_cxcywh_to_xyxy(bbox_preds) * factors\n        bboxes_gt = bbox_cxcywh_to_xyxy(bbox_targets) * factors\n\n        # regression IoU loss, defaultly GIoU loss\n        loss_iou = self.loss_iou(\n            bboxes, bboxes_gt, bbox_weights, avg_factor=num_total_pos)\n\n        # regression L1 loss\n        loss_bbox = self.loss_bbox(\n            bbox_preds, bbox_targets, bbox_weights, avg_factor=num_total_pos)\n        return loss_cls, loss_bbox, loss_iou\n\n    def get_targets(self,\n                    cls_scores_list,\n                    bbox_preds_list,\n                    gt_bboxes_list,\n                    gt_labels_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None):\n        \"\"\"\"Compute regression and classification targets for a batch image.\n\n        Outputs from a single decoder layer of a single feature level are used.\n\n        Args:\n            cls_scores_list (list[Tensor]): Box score logits from a single\n                decoder layer for each image with shape [num_query,\n                cls_out_channels].\n            bbox_preds_list (list[Tensor]): Sigmoid outputs from a single\n                decoder layer for each image, with normalized coordinate\n                (cx, cy, w, h) and shape [num_query, 4].\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes for each image\n                with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (num_gts, ).\n            img_metas (list[dict]): List of image meta information.\n            gt_bboxes_ignore_list (list[Tensor], optional): Bounding\n                boxes which can be ignored for each image. Default None.\n\n        Returns:\n            tuple: a tuple containing the following targets.\n\n                - labels_list (list[Tensor]): Labels for all images.\n                - label_weights_list (list[Tensor]): Label weights for all \\\n                    images.\n                - bbox_targets_list (list[Tensor]): BBox targets for all \\\n                    images.\n                - bbox_weights_list (list[Tensor]): BBox weights for all \\\n                    images.\n                - num_total_pos (int): Number of positive samples in all \\\n                    images.\n                - num_total_neg (int): Number of negative samples in all \\\n                    images.\n        \"\"\"\n        assert gt_bboxes_ignore_list is None, \\\n            'Only supports for gt_bboxes_ignore setting to None.'\n        num_imgs = len(cls_scores_list)\n        gt_bboxes_ignore_list = [\n            gt_bboxes_ignore_list for _ in range(num_imgs)\n        ]\n\n        (labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single, cls_scores_list, bbox_preds_list,\n             gt_bboxes_list, gt_labels_list, img_metas, gt_bboxes_ignore_list)\n        num_total_pos = sum((inds.numel() for inds in pos_inds_list))\n        num_total_neg = sum((inds.numel() for inds in neg_inds_list))\n        return (labels_list, label_weights_list, bbox_targets_list,\n                bbox_weights_list, num_total_pos, num_total_neg)\n\n    def _get_target_single(self,\n                           cls_score,\n                           bbox_pred,\n                           gt_bboxes,\n                           gt_labels,\n                           img_meta,\n                           gt_bboxes_ignore=None):\n        \"\"\"\"Compute regression and classification targets for one image.\n\n        Outputs from a single decoder layer of a single feature level are used.\n\n        Args:\n            cls_score (Tensor): Box score logits from a single decoder layer\n                for one image. Shape [num_query, cls_out_channels].\n            bbox_pred (Tensor): Sigmoid outputs from a single decoder layer\n                for one image, with normalized coordinate (cx, cy, w, h) and\n                shape [num_query, 4].\n            gt_bboxes (Tensor): Ground truth bboxes for one image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth class indices for one image\n                with shape (num_gts, ).\n            img_meta (dict): Meta information for one image.\n            gt_bboxes_ignore (Tensor, optional): Bounding boxes\n                which can be ignored. Default None.\n\n        Returns:\n            tuple[Tensor]: a tuple containing the following for one image.\n\n                - labels (Tensor): Labels of each image.\n                - label_weights (Tensor]): Label weights of each image.\n                - bbox_targets (Tensor): BBox targets of each image.\n                - bbox_weights (Tensor): BBox weights of each image.\n                - pos_inds (Tensor): Sampled positive indices for each image.\n                - neg_inds (Tensor): Sampled negative indices for each image.\n        \"\"\"\n\n        num_bboxes = bbox_pred.size(0)\n        # assigner and sampler\n        assign_result = self.assigner.assign(bbox_pred, cls_score, gt_bboxes,\n                                             gt_labels, img_meta,\n                                             gt_bboxes_ignore)\n        sampling_result = self.sampler.sample(assign_result, bbox_pred,\n                                              gt_bboxes)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n\n        # label targets\n        labels = gt_bboxes.new_full((num_bboxes, ),\n                                    self.num_classes,\n                                    dtype=torch.long)\n        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        label_weights = gt_bboxes.new_ones(num_bboxes)\n\n        # bbox targets\n        bbox_targets = torch.zeros_like(bbox_pred)\n        bbox_weights = torch.zeros_like(bbox_pred)\n        bbox_weights[pos_inds] = 1.0\n        img_h, img_w, _ = img_meta['img_shape']\n\n        # DETR regress the relative position of boxes (cxcywh) in the image.\n        # Thus the learning target should be normalized by the image size, also\n        # the box format should be converted from defaultly x1y1x2y2 to cxcywh.\n        factor = bbox_pred.new_tensor([img_w, img_h, img_w,\n                                       img_h]).unsqueeze(0)\n        pos_gt_bboxes_normalized = sampling_result.pos_gt_bboxes / factor\n        pos_gt_bboxes_targets = bbox_xyxy_to_cxcywh(pos_gt_bboxes_normalized)\n        bbox_targets[pos_inds] = pos_gt_bboxes_targets\n        return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,\n                neg_inds)\n\n    # over-write because img_metas are needed as inputs for bbox_head.\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      proposal_cfg=None,\n                      **kwargs):\n        \"\"\"Forward function for training mode.\n\n        Args:\n            x (list[Tensor]): Features from backbone.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            proposal_cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert proposal_cfg is None, '\"proposal_cfg\" must be None'\n        outs = self(x, img_metas)\n        if gt_labels is None:\n            loss_inputs = outs + (gt_bboxes, img_metas)\n        else:\n            loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)\n        losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        return losses\n\n    @force_fp32(apply_to=('all_cls_scores_list', 'all_bbox_preds_list'))\n    def get_bboxes(self,\n                   all_cls_scores_list,\n                   all_bbox_preds_list,\n                   img_metas,\n                   rescale=False):\n        \"\"\"Transform network outputs for a batch into bbox predictions.\n\n        Args:\n            all_cls_scores_list (list[Tensor]): Classification outputs\n                for each feature level. Each is a 4D-tensor with shape\n                [nb_dec, bs, num_query, cls_out_channels].\n            all_bbox_preds_list (list[Tensor]): Sigmoid regression\n                outputs for each feature level. Each is a 4D-tensor with\n                normalized coordinate format (cx, cy, w, h) and shape\n                [nb_dec, bs, num_query, 4].\n            img_metas (list[dict]): Meta information of each image.\n            rescale (bool, optional): If True, return boxes in original\n                image space. Default False.\n\n        Returns:\n            list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple. \\\n                The first item is an (n, 5) tensor, where the first 4 columns \\\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the \\\n                5-th column is a score between 0 and 1. The second item is a \\\n                (n,) tensor where each item is the predicted class label of \\\n                the corresponding box.\n        \"\"\"\n        # NOTE defaultly only using outputs from the last feature level,\n        # and only the outputs from the last decoder layer is used.\n        cls_scores = all_cls_scores_list[-1][-1]\n        bbox_preds = all_bbox_preds_list[-1][-1]\n\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score = cls_scores[img_id]\n            bbox_pred = bbox_preds[img_id]\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            proposals = self._get_bboxes_single(cls_score, bbox_pred,\n                                                img_shape, scale_factor,\n                                                rescale)\n            result_list.append(proposals)\n\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_score,\n                           bbox_pred,\n                           img_shape,\n                           scale_factor,\n                           rescale=False):\n        \"\"\"Transform outputs from the last decoder layer into bbox predictions\n        for each image.\n\n        Args:\n            cls_score (Tensor): Box score logits from the last decoder layer\n                for each image. Shape [num_query, cls_out_channels].\n            bbox_pred (Tensor): Sigmoid outputs from the last decoder layer\n                for each image, with coordinate format (cx, cy, w, h) and\n                shape [num_query, 4].\n            img_shape (tuple[int]): Shape of input image, (height, width, 3).\n            scale_factor (ndarray, optional): Scale factor of the image arange\n                as (w_scale, h_scale, w_scale, h_scale).\n            rescale (bool, optional): If True, return boxes in original image\n                space. Default False.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels.\n\n                - det_bboxes: Predicted bboxes with shape [num_query, 5], \\\n                    where the first 4 columns are bounding box positions \\\n                    (tl_x, tl_y, br_x, br_y) and the 5-th column are scores \\\n                    between 0 and 1.\n                - det_labels: Predicted labels of the corresponding box with \\\n                    shape [num_query].\n        \"\"\"\n        assert len(cls_score) == len(bbox_pred)\n        max_per_img = self.test_cfg.get('max_per_img', self.num_query)\n        # exclude background\n        if self.loss_cls.use_sigmoid:\n            cls_score = cls_score.sigmoid()\n            scores, indexes = cls_score.view(-1).topk(max_per_img)\n            det_labels = indexes % self.num_classes\n            bbox_index = indexes // self.num_classes\n            bbox_pred = bbox_pred[bbox_index]\n        else:\n            scores, det_labels = F.softmax(cls_score, dim=-1)[..., :-1].max(-1)\n            scores, bbox_index = scores.topk(max_per_img)\n            bbox_pred = bbox_pred[bbox_index]\n            det_labels = det_labels[bbox_index]\n\n        det_bboxes = bbox_cxcywh_to_xyxy(bbox_pred)\n        det_bboxes[:, 0::2] = det_bboxes[:, 0::2] * img_shape[1]\n        det_bboxes[:, 1::2] = det_bboxes[:, 1::2] * img_shape[0]\n        det_bboxes[:, 0::2].clamp_(min=0, max=img_shape[1])\n        det_bboxes[:, 1::2].clamp_(min=0, max=img_shape[0])\n        if rescale:\n            det_bboxes /= det_bboxes.new_tensor(scale_factor)\n        det_bboxes = torch.cat((det_bboxes, scores.unsqueeze(1)), -1)\n\n        return det_bboxes, det_labels\n\n    def simple_test_bboxes(self, feats, img_metas, rescale=False):\n        \"\"\"Test det bboxes without test-time augmentation.\n\n        Args:\n            feats (tuple[torch.Tensor]): Multi-level features from the\n                upstream network, each is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is ``bboxes`` with shape (n, 5),\n                where 5 represent (tl_x, tl_y, br_x, br_y, score).\n                The shape of the second tensor in the tuple is ``labels``\n                with shape (n,)\n        \"\"\"\n        # forward of this head requires img_metas\n        outs = self.forward(feats, img_metas)\n        results_list = self.get_bboxes(*outs, img_metas, rescale=rescale)\n        return results_list\n\n    def forward_onnx(self, feats, img_metas):\n        \"\"\"Forward function for exporting to ONNX.\n\n        Over-write `forward` because: `masks` is directly created with\n        zero (valid position tag) and has the same spatial size as `x`.\n        Thus the construction of `masks` is different from that in `forward`.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple[list[Tensor], list[Tensor]]: Outputs for all scale levels.\n\n                - all_cls_scores_list (list[Tensor]): Classification scores \\\n                    for each scale level. Each is a 4D-tensor with shape \\\n                    [nb_dec, bs, num_query, cls_out_channels]. Note \\\n                    `cls_out_channels` should includes background.\n                - all_bbox_preds_list (list[Tensor]): Sigmoid regression \\\n                    outputs for each scale level. Each is a 4D-tensor with \\\n                    normalized coordinate format (cx, cy, w, h) and shape \\\n                    [nb_dec, bs, num_query, 4].\n        \"\"\"\n        num_levels = len(feats)\n        img_metas_list = [img_metas for _ in range(num_levels)]\n        return multi_apply(self.forward_single_onnx, feats, img_metas_list)\n\n    def forward_single_onnx(self, x, img_metas):\n        \"\"\"\"Forward function for a single feature level with ONNX exportation.\n\n        Args:\n            x (Tensor): Input feature from backbone's single stage, shape\n                [bs, c, h, w].\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            all_cls_scores (Tensor): Outputs from the classification head,\n                shape [nb_dec, bs, num_query, cls_out_channels]. Note\n                cls_out_channels should includes background.\n            all_bbox_preds (Tensor): Sigmoid outputs from the regression\n                head with normalized coordinate format (cx, cy, w, h).\n                Shape [nb_dec, bs, num_query, 4].\n        \"\"\"\n        # Note `img_shape` is not dynamically traceable to ONNX,\n        # since the related augmentation was done with numpy under\n        # CPU. Thus `masks` is directly created with zeros (valid tag)\n        # and the same spatial shape as `x`.\n        # The difference between torch and exported ONNX model may be\n        # ignored, since the same performance is achieved (e.g.\n        # 40.1 vs 40.1 for DETR)\n        batch_size = x.size(0)\n        h, w = x.size()[-2:]\n        masks = x.new_zeros((batch_size, h, w))  # [B,h,w]\n\n        x = self.input_proj(x)\n        # interpolate masks to have the same spatial shape with x\n        masks = F.interpolate(\n            masks.unsqueeze(1), size=x.shape[-2:]).to(torch.bool).squeeze(1)\n        pos_embed = self.positional_encoding(masks)\n        outs_dec, _ = self.transformer(x, masks, self.query_embedding.weight,\n                                       pos_embed)\n\n        all_cls_scores = self.fc_cls(outs_dec)\n        all_bbox_preds = self.fc_reg(self.activate(\n            self.reg_ffn(outs_dec))).sigmoid()\n        return all_cls_scores, all_bbox_preds\n\n    def onnx_export(self, all_cls_scores_list, all_bbox_preds_list, img_metas):\n        \"\"\"Transform network outputs into bbox predictions, with ONNX\n        exportation.\n\n        Args:\n            all_cls_scores_list (list[Tensor]): Classification outputs\n                for each feature level. Each is a 4D-tensor with shape\n                [nb_dec, bs, num_query, cls_out_channels].\n            all_bbox_preds_list (list[Tensor]): Sigmoid regression\n                outputs for each feature level. Each is a 4D-tensor with\n                normalized coordinate format (cx, cy, w, h) and shape\n                [nb_dec, bs, num_query, 4].\n            img_metas (list[dict]): Meta information of each image.\n\n        Returns:\n            tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]\n                and class labels of shape [N, num_det].\n        \"\"\"\n        assert len(img_metas) == 1, \\\n            'Only support one input image while in exporting to ONNX'\n\n        cls_scores = all_cls_scores_list[-1][-1]\n        bbox_preds = all_bbox_preds_list[-1][-1]\n\n        # Note `img_shape` is not dynamically traceable to ONNX,\n        # here `img_shape_for_onnx` (padded shape of image tensor)\n        # is used.\n        img_shape = img_metas[0]['img_shape_for_onnx']\n        max_per_img = self.test_cfg.get('max_per_img', self.num_query)\n        batch_size = cls_scores.size(0)\n        # `batch_index_offset` is used for the gather of concatenated tensor\n        batch_index_offset = torch.arange(batch_size).to(\n            cls_scores.device) * max_per_img\n        batch_index_offset = batch_index_offset.unsqueeze(1).expand(\n            batch_size, max_per_img)\n\n        # supports dynamical batch inference\n        if self.loss_cls.use_sigmoid:\n            cls_scores = cls_scores.sigmoid()\n            scores, indexes = cls_scores.view(batch_size, -1).topk(\n                max_per_img, dim=1)\n            det_labels = indexes % self.num_classes\n            bbox_index = indexes // self.num_classes\n            bbox_index = (bbox_index + batch_index_offset).view(-1)\n            bbox_preds = bbox_preds.view(-1, 4)[bbox_index]\n            bbox_preds = bbox_preds.view(batch_size, -1, 4)\n        else:\n            scores, det_labels = F.softmax(\n                cls_scores, dim=-1)[..., :-1].max(-1)\n            scores, bbox_index = scores.topk(max_per_img, dim=1)\n            bbox_index = (bbox_index + batch_index_offset).view(-1)\n            bbox_preds = bbox_preds.view(-1, 4)[bbox_index]\n            det_labels = det_labels.view(-1)[bbox_index]\n            bbox_preds = bbox_preds.view(batch_size, -1, 4)\n            det_labels = det_labels.view(batch_size, -1)\n\n        det_bboxes = bbox_cxcywh_to_xyxy(bbox_preds)\n        # use `img_shape_tensor` for dynamically exporting to ONNX\n        img_shape_tensor = img_shape.flip(0).repeat(2)  # [w,h,w,h]\n        img_shape_tensor = img_shape_tensor.unsqueeze(0).unsqueeze(0).expand(\n            batch_size, det_bboxes.size(1), 4)\n        det_bboxes = det_bboxes * img_shape_tensor\n        # dynamically clip bboxes\n        x1, y1, x2, y2 = det_bboxes.split((1, 1, 1, 1), dim=-1)\n        from mmdet.core.export import dynamic_clip_for_onnx\n        x1, y1, x2, y2 = dynamic_clip_for_onnx(x1, y1, x2, y2, img_shape)\n        det_bboxes = torch.cat([x1, y1, x2, y2], dim=-1)\n        det_bboxes = torch.cat((det_bboxes, scores.unsqueeze(-1)), -1)\n\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/embedding_rpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import BaseModule\n\nfrom mmdet.models.builder import HEADS\nfrom ...core import bbox_cxcywh_to_xyxy\n\n\n@HEADS.register_module()\nclass EmbeddingRPNHead(BaseModule):\n    \"\"\"RPNHead in the `Sparse R-CNN <https://arxiv.org/abs/2011.12450>`_ .\n\n    Unlike traditional RPNHead, this module does not need FPN input, but just\n    decode `init_proposal_bboxes` and expand the first dimension of\n    `init_proposal_bboxes` and `init_proposal_features` to the batch_size.\n\n    Args:\n        num_proposals (int): Number of init_proposals. Default 100.\n        proposal_feature_channel (int): Channel number of\n            init_proposal_feature. Defaults to 256.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 num_proposals=100,\n                 proposal_feature_channel=256,\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(EmbeddingRPNHead, self).__init__(init_cfg)\n        self.num_proposals = num_proposals\n        self.proposal_feature_channel = proposal_feature_channel\n        self._init_layers()\n\n    def _init_layers(self):\n        \"\"\"Initialize a sparse set of proposal boxes and proposal features.\"\"\"\n        self.init_proposal_bboxes = nn.Embedding(self.num_proposals, 4)\n        self.init_proposal_features = nn.Embedding(\n            self.num_proposals, self.proposal_feature_channel)\n\n    def init_weights(self):\n        \"\"\"Initialize the init_proposal_bboxes as normalized.\n\n        [c_x, c_y, w, h], and we initialize it to the size of  the entire\n        image.\n        \"\"\"\n        super(EmbeddingRPNHead, self).init_weights()\n        nn.init.constant_(self.init_proposal_bboxes.weight[:, :2], 0.5)\n        nn.init.constant_(self.init_proposal_bboxes.weight[:, 2:], 1)\n\n    def _decode_init_proposals(self, imgs, img_metas):\n        \"\"\"Decode init_proposal_bboxes according to the size of images and\n        expand dimension of init_proposal_features to batch_size.\n\n        Args:\n            imgs (list[Tensor]): List of FPN features.\n            img_metas (list[dict]): List of meta-information of\n                images. Need the img_shape to decode the init_proposals.\n\n        Returns:\n            Tuple(Tensor):\n\n                - proposals (Tensor): Decoded proposal bboxes,\n                  has shape (batch_size, num_proposals, 4).\n                - init_proposal_features (Tensor): Expanded proposal\n                  features, has shape\n                  (batch_size, num_proposals, proposal_feature_channel).\n                - imgs_whwh (Tensor): Tensor with shape\n                  (batch_size, 4), the dimension means\n                  [img_width, img_height, img_width, img_height].\n        \"\"\"\n        proposals = self.init_proposal_bboxes.weight.clone()\n        proposals = bbox_cxcywh_to_xyxy(proposals)\n        num_imgs = len(imgs[0])\n        imgs_whwh = []\n        for meta in img_metas:\n            h, w, _ = meta['img_shape']\n            imgs_whwh.append(imgs[0].new_tensor([[w, h, w, h]]))\n        imgs_whwh = torch.cat(imgs_whwh, dim=0)\n        imgs_whwh = imgs_whwh[:, None, :]\n\n        # imgs_whwh has shape (batch_size, 1, 4)\n        # The shape of proposals change from (num_proposals, 4)\n        # to (batch_size ,num_proposals, 4)\n        proposals = proposals * imgs_whwh\n\n        init_proposal_features = self.init_proposal_features.weight.clone()\n        init_proposal_features = init_proposal_features[None].expand(\n            num_imgs, *init_proposal_features.size())\n        return proposals, init_proposal_features, imgs_whwh\n\n    def forward_dummy(self, img, img_metas):\n        \"\"\"Dummy forward function.\n\n        Used in flops calculation.\n        \"\"\"\n        return self._decode_init_proposals(img, img_metas)\n\n    def forward_train(self, img, img_metas):\n        \"\"\"Forward function in training stage.\"\"\"\n        return self._decode_init_proposals(img, img_metas)\n\n    def simple_test_rpn(self, img, img_metas):\n        \"\"\"Forward function in testing stage.\"\"\"\n        return self._decode_init_proposals(img, img_metas)\n\n    def simple_test(self, img, img_metas):\n        \"\"\"Forward function in testing stage.\"\"\"\n        raise NotImplementedError\n\n    def aug_test_rpn(self, feats, img_metas):\n        raise NotImplementedError(\n            'EmbeddingRPNHead does not support test-time augmentation')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/fcos_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import Scale\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import multi_apply, reduce_mean\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_head import AnchorFreeHead\n\nINF = 1e8\n\n\n@HEADS.register_module()\nclass FCOSHead(AnchorFreeHead):\n    \"\"\"Anchor-free head used in `FCOS <https://arxiv.org/abs/1904.01355>`_.\n\n    The FCOS head does not use anchor boxes. Instead bounding boxes are\n    predicted at each pixel and a centerness measure is used to suppress\n    low-quality predictions.\n    Here norm_on_bbox, centerness_on_reg, dcn_on_last_conv are training\n    tricks used in official repo, which will bring remarkable mAP gains\n    of up to 4.9. Please see https://github.com/tianzhi0549/FCOS for\n    more detail.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        strides (list[int] | list[tuple[int, int]]): Strides of points\n            in multiple feature levels. Default: (4, 8, 16, 32, 64).\n        regress_ranges (tuple[tuple[int, int]]): Regress range of multiple\n            level points.\n        center_sampling (bool): If true, use center sampling. Default: False.\n        center_sample_radius (float): Radius of center sampling. Default: 1.5.\n        norm_on_bbox (bool): If true, normalize the regression targets\n            with FPN strides. Default: False.\n        centerness_on_reg (bool): If true, position centerness on the\n            regress branch. Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042.\n            Default: False.\n        conv_bias (bool | str): If specified as `auto`, it will be decided by the\n            norm_cfg. Bias of conv will be set as True if `norm_cfg` is None, otherwise\n            False. Default: \"auto\".\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_centerness (dict): Config of centerness loss.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n\n    Example:\n        >>> self = FCOSHead(11, 7)\n        >>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]\n        >>> cls_score, bbox_pred, centerness = self.forward(feats)\n        >>> assert len(cls_score) == len(self.scales)\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512),\n                                 (512, INF)),\n                 center_sampling=False,\n                 center_sample_radius=1.5,\n                 norm_on_bbox=False,\n                 centerness_on_reg=False,\n                 loss_cls=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_bbox=dict(type='IoULoss', loss_weight=1.0),\n                 loss_centerness=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='conv_cls',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        self.regress_ranges = regress_ranges\n        self.center_sampling = center_sampling\n        self.center_sample_radius = center_sample_radius\n        self.norm_on_bbox = norm_on_bbox\n        self.centerness_on_reg = centerness_on_reg\n        super().__init__(\n            num_classes,\n            in_channels,\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            **kwargs)\n        self.loss_centerness = build_loss(loss_centerness)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        super()._init_layers()\n        self.conv_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1)\n        self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides])\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Box scores for each scale level, \\\n                    each is a 4D-tensor, the channel number is \\\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for each \\\n                    scale level, each is a 4D-tensor, the channel number is \\\n                    num_points * 4.\n                centernesses (list[Tensor]): centerness for each scale level, \\\n                    each is a 4D-tensor, the channel number is num_points * 1.\n        \"\"\"\n        return multi_apply(self.forward_single, feats, self.scales,\n                           self.strides)\n\n    def forward_single(self, x, scale, stride):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n            stride (int): The corresponding stride for feature maps, only\n                used to normalize the bbox prediction when self.norm_on_bbox\n                is True.\n\n        Returns:\n            tuple: scores for each class, bbox predictions and centerness \\\n                predictions of input feature maps.\n        \"\"\"\n        cls_score, bbox_pred, cls_feat, reg_feat = super().forward_single(x)\n        if self.centerness_on_reg:\n            centerness = self.conv_centerness(reg_feat)\n        else:\n            centerness = self.conv_centerness(cls_feat)\n        # scale the bbox_pred of different level\n        # float to avoid overflow when enabling FP16\n        bbox_pred = scale(bbox_pred).float()\n        if self.norm_on_bbox:\n            # bbox_pred needed for gradient computation has been modified\n            # by F.relu(bbox_pred) when run with PyTorch 1.10. So replace\n            # F.relu(bbox_pred) with bbox_pred.clamp(min=0)\n            bbox_pred = bbox_pred.clamp(min=0)\n            if not self.training:\n                bbox_pred *= stride\n        else:\n            bbox_pred = bbox_pred.exp()\n        return cls_score, bbox_pred, centerness\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'centernesses'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             centernesses,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * 4.\n            centernesses (list[Tensor]): centerness for each scale level, each\n                is a 4D-tensor, the channel number is num_points * 1.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(centernesses)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        all_level_points = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=bbox_preds[0].dtype,\n            device=bbox_preds[0].device)\n        labels, bbox_targets = self.get_targets(all_level_points, gt_bboxes,\n                                                gt_labels)\n\n        num_imgs = cls_scores[0].size(0)\n        # flatten cls_scores, bbox_preds and centerness\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_centerness = [\n            centerness.permute(0, 2, 3, 1).reshape(-1)\n            for centerness in centernesses\n        ]\n        flatten_cls_scores = torch.cat(flatten_cls_scores)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds)\n        flatten_centerness = torch.cat(flatten_centerness)\n        flatten_labels = torch.cat(labels)\n        flatten_bbox_targets = torch.cat(bbox_targets)\n        # repeat points to align with bbox_preds\n        flatten_points = torch.cat(\n            [points.repeat(num_imgs, 1) for points in all_level_points])\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((flatten_labels >= 0)\n                    & (flatten_labels < bg_class_ind)).nonzero().reshape(-1)\n        num_pos = torch.tensor(\n            len(pos_inds), dtype=torch.float, device=bbox_preds[0].device)\n        num_pos = max(reduce_mean(num_pos), 1.0)\n        loss_cls = self.loss_cls(\n            flatten_cls_scores, flatten_labels, avg_factor=num_pos)\n\n        pos_bbox_preds = flatten_bbox_preds[pos_inds]\n        pos_centerness = flatten_centerness[pos_inds]\n        pos_bbox_targets = flatten_bbox_targets[pos_inds]\n        pos_centerness_targets = self.centerness_target(pos_bbox_targets)\n        # centerness weighted iou loss\n        centerness_denorm = max(\n            reduce_mean(pos_centerness_targets.sum().detach()), 1e-6)\n\n        if len(pos_inds) > 0:\n            pos_points = flatten_points[pos_inds]\n            pos_decoded_bbox_preds = self.bbox_coder.decode(\n                pos_points, pos_bbox_preds)\n            pos_decoded_target_preds = self.bbox_coder.decode(\n                pos_points, pos_bbox_targets)\n            loss_bbox = self.loss_bbox(\n                pos_decoded_bbox_preds,\n                pos_decoded_target_preds,\n                weight=pos_centerness_targets,\n                avg_factor=centerness_denorm)\n            loss_centerness = self.loss_centerness(\n                pos_centerness, pos_centerness_targets, avg_factor=num_pos)\n        else:\n            loss_bbox = pos_bbox_preds.sum()\n            loss_centerness = pos_centerness.sum()\n\n        return dict(\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            loss_centerness=loss_centerness)\n\n    def get_targets(self, points, gt_bboxes_list, gt_labels_list):\n        \"\"\"Compute regression, classification and centerness targets for points\n        in multiple images.\n\n        Args:\n            points (list[Tensor]): Points of each fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n\n        Returns:\n            tuple:\n                concat_lvl_labels (list[Tensor]): Labels of each level. \\\n                concat_lvl_bbox_targets (list[Tensor]): BBox targets of each \\\n                    level.\n        \"\"\"\n        assert len(points) == len(self.regress_ranges)\n        num_levels = len(points)\n        # expand regress ranges to align with points\n        expanded_regress_ranges = [\n            points[i].new_tensor(self.regress_ranges[i])[None].expand_as(\n                points[i]) for i in range(num_levels)\n        ]\n        # concat all levels points and regress ranges\n        concat_regress_ranges = torch.cat(expanded_regress_ranges, dim=0)\n        concat_points = torch.cat(points, dim=0)\n\n        # the number of points per img, per lvl\n        num_points = [center.size(0) for center in points]\n\n        # get labels and bbox_targets of each image\n        labels_list, bbox_targets_list = multi_apply(\n            self._get_target_single,\n            gt_bboxes_list,\n            gt_labels_list,\n            points=concat_points,\n            regress_ranges=concat_regress_ranges,\n            num_points_per_lvl=num_points)\n\n        # split to per img, per level\n        labels_list = [labels.split(num_points, 0) for labels in labels_list]\n        bbox_targets_list = [\n            bbox_targets.split(num_points, 0)\n            for bbox_targets in bbox_targets_list\n        ]\n\n        # concat per level image\n        concat_lvl_labels = []\n        concat_lvl_bbox_targets = []\n        for i in range(num_levels):\n            concat_lvl_labels.append(\n                torch.cat([labels[i] for labels in labels_list]))\n            bbox_targets = torch.cat(\n                [bbox_targets[i] for bbox_targets in bbox_targets_list])\n            if self.norm_on_bbox:\n                bbox_targets = bbox_targets / self.strides[i]\n            concat_lvl_bbox_targets.append(bbox_targets)\n        return concat_lvl_labels, concat_lvl_bbox_targets\n\n    def _get_target_single(self, gt_bboxes, gt_labels, points, regress_ranges,\n                           num_points_per_lvl):\n        \"\"\"Compute regression and classification targets for a single image.\"\"\"\n        num_points = points.size(0)\n        num_gts = gt_labels.size(0)\n        if num_gts == 0:\n            return gt_labels.new_full((num_points,), self.num_classes), \\\n                   gt_bboxes.new_zeros((num_points, 4))\n\n        areas = (gt_bboxes[:, 2] - gt_bboxes[:, 0]) * (\n            gt_bboxes[:, 3] - gt_bboxes[:, 1])\n        # TODO: figure out why these two are different\n        # areas = areas[None].expand(num_points, num_gts)\n        areas = areas[None].repeat(num_points, 1)\n        regress_ranges = regress_ranges[:, None, :].expand(\n            num_points, num_gts, 2)\n        gt_bboxes = gt_bboxes[None].expand(num_points, num_gts, 4)\n        xs, ys = points[:, 0], points[:, 1]\n        xs = xs[:, None].expand(num_points, num_gts)\n        ys = ys[:, None].expand(num_points, num_gts)\n\n        left = xs - gt_bboxes[..., 0]\n        right = gt_bboxes[..., 2] - xs\n        top = ys - gt_bboxes[..., 1]\n        bottom = gt_bboxes[..., 3] - ys\n        bbox_targets = torch.stack((left, top, right, bottom), -1)\n\n        if self.center_sampling:\n            # condition1: inside a `center bbox`\n            radius = self.center_sample_radius\n            center_xs = (gt_bboxes[..., 0] + gt_bboxes[..., 2]) / 2\n            center_ys = (gt_bboxes[..., 1] + gt_bboxes[..., 3]) / 2\n            center_gts = torch.zeros_like(gt_bboxes)\n            stride = center_xs.new_zeros(center_xs.shape)\n\n            # project the points on current lvl back to the `original` sizes\n            lvl_begin = 0\n            for lvl_idx, num_points_lvl in enumerate(num_points_per_lvl):\n                lvl_end = lvl_begin + num_points_lvl\n                stride[lvl_begin:lvl_end] = self.strides[lvl_idx] * radius\n                lvl_begin = lvl_end\n\n            x_mins = center_xs - stride\n            y_mins = center_ys - stride\n            x_maxs = center_xs + stride\n            y_maxs = center_ys + stride\n            center_gts[..., 0] = torch.where(x_mins > gt_bboxes[..., 0],\n                                             x_mins, gt_bboxes[..., 0])\n            center_gts[..., 1] = torch.where(y_mins > gt_bboxes[..., 1],\n                                             y_mins, gt_bboxes[..., 1])\n            center_gts[..., 2] = torch.where(x_maxs > gt_bboxes[..., 2],\n                                             gt_bboxes[..., 2], x_maxs)\n            center_gts[..., 3] = torch.where(y_maxs > gt_bboxes[..., 3],\n                                             gt_bboxes[..., 3], y_maxs)\n\n            cb_dist_left = xs - center_gts[..., 0]\n            cb_dist_right = center_gts[..., 2] - xs\n            cb_dist_top = ys - center_gts[..., 1]\n            cb_dist_bottom = center_gts[..., 3] - ys\n            center_bbox = torch.stack(\n                (cb_dist_left, cb_dist_top, cb_dist_right, cb_dist_bottom), -1)\n            inside_gt_bbox_mask = center_bbox.min(-1)[0] > 0\n        else:\n            # condition1: inside a gt bbox\n            inside_gt_bbox_mask = bbox_targets.min(-1)[0] > 0\n\n        # condition2: limit the regression range for each location\n        max_regress_distance = bbox_targets.max(-1)[0]\n        inside_regress_range = (\n            (max_regress_distance >= regress_ranges[..., 0])\n            & (max_regress_distance <= regress_ranges[..., 1]))\n\n        # if there are still more than one objects for a location,\n        # we choose the one with minimal area\n        areas[inside_gt_bbox_mask == 0] = INF\n        areas[inside_regress_range == 0] = INF\n        min_area, min_area_inds = areas.min(dim=1)\n\n        labels = gt_labels[min_area_inds]\n        labels[min_area == INF] = self.num_classes  # set as BG\n        bbox_targets = bbox_targets[range(num_points), min_area_inds]\n\n        return labels, bbox_targets\n\n    def centerness_target(self, pos_bbox_targets):\n        \"\"\"Compute centerness targets.\n\n        Args:\n            pos_bbox_targets (Tensor): BBox targets of positive bboxes in shape\n                (num_pos, 4)\n\n        Returns:\n            Tensor: Centerness target.\n        \"\"\"\n        # only calculate pos centerness targets, otherwise there may be nan\n        left_right = pos_bbox_targets[:, [0, 2]]\n        top_bottom = pos_bbox_targets[:, [1, 3]]\n        if len(left_right) == 0:\n            centerness_targets = left_right[..., 0]\n        else:\n            centerness_targets = (\n                left_right.min(dim=-1)[0] / left_right.max(dim=-1)[0]) * (\n                    top_bottom.min(dim=-1)[0] / top_bottom.max(dim=-1)[0])\n        return torch.sqrt(centerness_targets)\n\n    def _get_points_single(self,\n                           featmap_size,\n                           stride,\n                           dtype,\n                           device,\n                           flatten=False):\n        \"\"\"Get points according to feature map size.\n\n        This function will be deprecated soon.\n        \"\"\"\n        warnings.warn(\n            '`_get_points_single` in `FCOSHead` will be '\n            'deprecated soon, we support a multi level point generator now'\n            'you can get points of a single level feature map '\n            'with `self.prior_generator.single_level_grid_priors` ')\n\n        y, x = super()._get_points_single(featmap_size, stride, dtype, device)\n        points = torch.stack((x.reshape(-1) * stride, y.reshape(-1) * stride),\n                             dim=-1) + stride // 2\n        return points\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/fovea_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import DeformConv2d\nfrom mmcv.runner import BaseModule\n\nfrom mmdet.core import multi_apply\nfrom mmdet.core.utils import filter_scores_and_topk\nfrom ..builder import HEADS\nfrom .anchor_free_head import AnchorFreeHead\n\nINF = 1e8\n\n\nclass FeatureAlign(BaseModule):\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size=3,\n                 deform_groups=4,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.1,\n                     override=dict(\n                         type='Normal', name='conv_adaption', std=0.01))):\n        super(FeatureAlign, self).__init__(init_cfg)\n        offset_channels = kernel_size * kernel_size * 2\n        self.conv_offset = nn.Conv2d(\n            4, deform_groups * offset_channels, 1, bias=False)\n        self.conv_adaption = DeformConv2d(\n            in_channels,\n            out_channels,\n            kernel_size=kernel_size,\n            padding=(kernel_size - 1) // 2,\n            deform_groups=deform_groups)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x, shape):\n        offset = self.conv_offset(shape)\n        x = self.relu(self.conv_adaption(x, offset))\n        return x\n\n\n@HEADS.register_module()\nclass FoveaHead(AnchorFreeHead):\n    \"\"\"FoveaBox: Beyond Anchor-based Object Detector\n    https://arxiv.org/abs/1904.03797\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 base_edge_list=(16, 32, 64, 128, 256),\n                 scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128,\n                                                                         512)),\n                 sigma=0.4,\n                 with_deform=False,\n                 deform_groups=4,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='conv_cls',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        self.base_edge_list = base_edge_list\n        self.scale_ranges = scale_ranges\n        self.sigma = sigma\n        self.with_deform = with_deform\n        self.deform_groups = deform_groups\n        super().__init__(num_classes, in_channels, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        # box branch\n        super()._init_reg_convs()\n        self.conv_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)\n\n        # cls branch\n        if not self.with_deform:\n            super()._init_cls_convs()\n            self.conv_cls = nn.Conv2d(\n                self.feat_channels, self.cls_out_channels, 3, padding=1)\n        else:\n            self.cls_convs = nn.ModuleList()\n            self.cls_convs.append(\n                ConvModule(\n                    self.feat_channels, (self.feat_channels * 4),\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.norm_cfg is None))\n            self.cls_convs.append(\n                ConvModule((self.feat_channels * 4), (self.feat_channels * 4),\n                           1,\n                           stride=1,\n                           padding=0,\n                           conv_cfg=self.conv_cfg,\n                           norm_cfg=self.norm_cfg,\n                           bias=self.norm_cfg is None))\n            self.feature_adaption = FeatureAlign(\n                self.feat_channels,\n                self.feat_channels,\n                kernel_size=3,\n                deform_groups=self.deform_groups)\n            self.conv_cls = nn.Conv2d(\n                int(self.feat_channels * 4),\n                self.cls_out_channels,\n                3,\n                padding=1)\n\n    def forward_single(self, x):\n        cls_feat = x\n        reg_feat = x\n        for reg_layer in self.reg_convs:\n            reg_feat = reg_layer(reg_feat)\n        bbox_pred = self.conv_reg(reg_feat)\n        if self.with_deform:\n            cls_feat = self.feature_adaption(cls_feat, bbox_pred.exp())\n        for cls_layer in self.cls_convs:\n            cls_feat = cls_layer(cls_feat)\n        cls_score = self.conv_cls(cls_feat)\n        return cls_score, bbox_pred\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bbox_list,\n             gt_label_list,\n             img_metas,\n             gt_bboxes_ignore=None):\n        assert len(cls_scores) == len(bbox_preds)\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        points = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=bbox_preds[0].dtype,\n            device=bbox_preds[0].device)\n        num_imgs = cls_scores[0].size(0)\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(-1, self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_cls_scores = torch.cat(flatten_cls_scores)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds)\n        flatten_labels, flatten_bbox_targets = self.get_targets(\n            gt_bbox_list, gt_label_list, featmap_sizes, points)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        pos_inds = ((flatten_labels >= 0)\n                    & (flatten_labels < self.num_classes)).nonzero().view(-1)\n        num_pos = len(pos_inds)\n\n        loss_cls = self.loss_cls(\n            flatten_cls_scores, flatten_labels, avg_factor=num_pos + num_imgs)\n        if num_pos > 0:\n            pos_bbox_preds = flatten_bbox_preds[pos_inds]\n            pos_bbox_targets = flatten_bbox_targets[pos_inds]\n            pos_weights = pos_bbox_targets.new_zeros(\n                pos_bbox_targets.size()) + 1.0\n            loss_bbox = self.loss_bbox(\n                pos_bbox_preds,\n                pos_bbox_targets,\n                pos_weights,\n                avg_factor=num_pos)\n        else:\n            loss_bbox = torch.tensor(\n                0,\n                dtype=flatten_bbox_preds.dtype,\n                device=flatten_bbox_preds.device)\n        return dict(loss_cls=loss_cls, loss_bbox=loss_bbox)\n\n    def get_targets(self, gt_bbox_list, gt_label_list, featmap_sizes, points):\n        label_list, bbox_target_list = multi_apply(\n            self._get_target_single,\n            gt_bbox_list,\n            gt_label_list,\n            featmap_size_list=featmap_sizes,\n            point_list=points)\n        flatten_labels = [\n            torch.cat([\n                labels_level_img.flatten() for labels_level_img in labels_level\n            ]) for labels_level in zip(*label_list)\n        ]\n        flatten_bbox_targets = [\n            torch.cat([\n                bbox_targets_level_img.reshape(-1, 4)\n                for bbox_targets_level_img in bbox_targets_level\n            ]) for bbox_targets_level in zip(*bbox_target_list)\n        ]\n        flatten_labels = torch.cat(flatten_labels)\n        flatten_bbox_targets = torch.cat(flatten_bbox_targets)\n        return flatten_labels, flatten_bbox_targets\n\n    def _get_target_single(self,\n                           gt_bboxes_raw,\n                           gt_labels_raw,\n                           featmap_size_list=None,\n                           point_list=None):\n\n        gt_areas = torch.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *\n                              (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))\n        label_list = []\n        bbox_target_list = []\n        # for each pyramid, find the cls and box target\n        for base_len, (lower_bound, upper_bound), stride, featmap_size, \\\n            points in zip(self.base_edge_list, self.scale_ranges,\n                          self.strides, featmap_size_list, point_list):\n            # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n            points = points.view(*featmap_size, 2)\n            x, y = points[..., 0], points[..., 1]\n            labels = gt_labels_raw.new_zeros(featmap_size) + self.num_classes\n            bbox_targets = gt_bboxes_raw.new(featmap_size[0], featmap_size[1],\n                                             4) + 1\n            # scale assignment\n            hit_indices = ((gt_areas >= lower_bound) &\n                           (gt_areas <= upper_bound)).nonzero().flatten()\n            if len(hit_indices) == 0:\n                label_list.append(labels)\n                bbox_target_list.append(torch.log(bbox_targets))\n                continue\n            _, hit_index_order = torch.sort(-gt_areas[hit_indices])\n            hit_indices = hit_indices[hit_index_order]\n            gt_bboxes = gt_bboxes_raw[hit_indices, :] / stride\n            gt_labels = gt_labels_raw[hit_indices]\n            half_w = 0.5 * (gt_bboxes[:, 2] - gt_bboxes[:, 0])\n            half_h = 0.5 * (gt_bboxes[:, 3] - gt_bboxes[:, 1])\n            # valid fovea area: left, right, top, down\n            pos_left = torch.ceil(\n                gt_bboxes[:, 0] + (1 - self.sigma) * half_w - 0.5).long(). \\\n                clamp(0, featmap_size[1] - 1)\n            pos_right = torch.floor(\n                gt_bboxes[:, 0] + (1 + self.sigma) * half_w - 0.5).long(). \\\n                clamp(0, featmap_size[1] - 1)\n            pos_top = torch.ceil(\n                gt_bboxes[:, 1] + (1 - self.sigma) * half_h - 0.5).long(). \\\n                clamp(0, featmap_size[0] - 1)\n            pos_down = torch.floor(\n                gt_bboxes[:, 1] + (1 + self.sigma) * half_h - 0.5).long(). \\\n                clamp(0, featmap_size[0] - 1)\n            for px1, py1, px2, py2, label, (gt_x1, gt_y1, gt_x2, gt_y2) in \\\n                    zip(pos_left, pos_top, pos_right, pos_down, gt_labels,\n                        gt_bboxes_raw[hit_indices, :]):\n                labels[py1:py2 + 1, px1:px2 + 1] = label\n                bbox_targets[py1:py2 + 1, px1:px2 + 1, 0] = \\\n                    (x[py1:py2 + 1, px1:px2 + 1] - gt_x1) / base_len\n                bbox_targets[py1:py2 + 1, px1:px2 + 1, 1] = \\\n                    (y[py1:py2 + 1, px1:px2 + 1] - gt_y1) / base_len\n                bbox_targets[py1:py2 + 1, px1:px2 + 1, 2] = \\\n                    (gt_x2 - x[py1:py2 + 1, px1:px2 + 1]) / base_len\n                bbox_targets[py1:py2 + 1, px1:px2 + 1, 3] = \\\n                    (gt_y2 - y[py1:py2 + 1, px1:px2 + 1]) / base_len\n            bbox_targets = bbox_targets.clamp(min=1. / 16, max=16.)\n            label_list.append(labels)\n            bbox_target_list.append(torch.log(bbox_targets))\n        return label_list, bbox_target_list\n\n    # Same as base_dense_head/_get_bboxes_single except self._bbox_decode\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_priors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_priors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has shape\n                (num_priors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factor from all scale\n                levels of a single image. Fovea head does not need this value.\n            mlvl_priors (list[Tensor]): Each element in the list is\n                the priors of a single level in feature pyramid, has shape\n                (num_priors, 2).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_score_list) == len(bbox_pred_list)\n        img_shape = img_meta['img_shape']\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_labels = []\n        for level_idx, (cls_score, bbox_pred, stride, base_len, priors) in \\\n                enumerate(zip(cls_score_list, bbox_pred_list, self.strides,\n                              self.base_edge_list, mlvl_priors)):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n\n            scores = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels).sigmoid()\n\n            # After https://github.com/open-mmlab/mmdetection/pull/6268/,\n            # this operation keeps fewer bboxes under the same `nms_pre`.\n            # There is no difference in performance for most models. If you\n            # find a slight drop in performance, you can set a larger\n            # `nms_pre` than before.\n            results = filter_scores_and_topk(\n                scores, cfg.score_thr, nms_pre,\n                dict(bbox_pred=bbox_pred, priors=priors))\n            scores, labels, _, filtered_results = results\n\n            bbox_pred = filtered_results['bbox_pred']\n            priors = filtered_results['priors']\n\n            bboxes = self._bbox_decode(priors, bbox_pred, base_len, img_shape)\n\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_labels.append(labels)\n\n        return self._bbox_post_process(mlvl_scores, mlvl_labels, mlvl_bboxes,\n                                       img_meta['scale_factor'], cfg, rescale,\n                                       with_nms)\n\n    def _bbox_decode(self, priors, bbox_pred, base_len, max_shape):\n        bbox_pred = bbox_pred.exp()\n\n        y = priors[:, 1]\n        x = priors[:, 0]\n        x1 = (x - base_len * bbox_pred[:, 0]). \\\n            clamp(min=0, max=max_shape[1] - 1)\n        y1 = (y - base_len * bbox_pred[:, 1]). \\\n            clamp(min=0, max=max_shape[0] - 1)\n        x2 = (x + base_len * bbox_pred[:, 2]). \\\n            clamp(min=0, max=max_shape[1] - 1)\n        y2 = (y + base_len * bbox_pred[:, 3]). \\\n            clamp(min=0, max=max_shape[0] - 1)\n        decoded_bboxes = torch.stack([x1, y1, x2, y2], -1)\n        return decoded_bboxes\n\n    def _get_points_single(self, *args, **kwargs):\n        \"\"\"Get points according to feature map size.\n\n        This function will be deprecated soon.\n        \"\"\"\n        warnings.warn(\n            '`_get_points_single` in `FoveaHead` will be '\n            'deprecated soon, we support a multi level point generator now'\n            'you can get points of a single level feature map '\n            'with `self.prior_generator.single_level_grid_priors` ')\n        y, x = super()._get_points_single(*args, **kwargs)\n        return y + 0.5, x + 0.5\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/free_anchor_retina_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn.functional as F\n\nfrom mmdet.core import bbox_overlaps\nfrom ..builder import HEADS\nfrom .retina_head import RetinaHead\n\nEPS = 1e-12\n\n\n@HEADS.register_module()\nclass FreeAnchorRetinaHead(RetinaHead):\n    \"\"\"FreeAnchor RetinaHead used in https://arxiv.org/abs/1909.02466.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        stacked_convs (int): Number of conv layers in cls and reg tower.\n            Default: 4.\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32,\n            requires_grad=True).\n        pre_anchor_topk (int): Number of boxes that be token in each bag.\n        bbox_thr (float): The threshold of the saturated linear function. It is\n            usually the same with the IoU threshold used in NMS.\n        gamma (float): Gamma parameter in focal loss.\n        alpha (float): Alpha parameter in focal loss.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 pre_anchor_topk=50,\n                 bbox_thr=0.6,\n                 gamma=2.0,\n                 alpha=0.5,\n                 **kwargs):\n        super(FreeAnchorRetinaHead,\n              self).__init__(num_classes, in_channels, stacked_convs, conv_cfg,\n                             norm_cfg, **kwargs)\n\n        self.pre_anchor_topk = pre_anchor_topk\n        self.bbox_thr = bbox_thr\n        self.gamma = gamma\n        self.alpha = alpha\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n        device = cls_scores[0].device\n        anchor_list, _ = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        anchors = [torch.cat(anchor) for anchor in anchor_list]\n\n        # concatenate each level\n        cls_scores = [\n            cls.permute(0, 2, 3,\n                        1).reshape(cls.size(0), -1, self.cls_out_channels)\n            for cls in cls_scores\n        ]\n        bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(bbox_pred.size(0), -1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        cls_scores = torch.cat(cls_scores, dim=1)\n        bbox_preds = torch.cat(bbox_preds, dim=1)\n\n        cls_prob = torch.sigmoid(cls_scores)\n        box_prob = []\n        num_pos = 0\n        positive_losses = []\n        for _, (anchors_, gt_labels_, gt_bboxes_, cls_prob_,\n                bbox_preds_) in enumerate(\n                    zip(anchors, gt_labels, gt_bboxes, cls_prob, bbox_preds)):\n\n            with torch.no_grad():\n                if len(gt_bboxes_) == 0:\n                    image_box_prob = torch.zeros(\n                        anchors_.size(0),\n                        self.cls_out_channels).type_as(bbox_preds_)\n                else:\n                    # box_localization: a_{j}^{loc}, shape: [j, 4]\n                    pred_boxes = self.bbox_coder.decode(anchors_, bbox_preds_)\n\n                    # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]\n                    object_box_iou = bbox_overlaps(gt_bboxes_, pred_boxes)\n\n                    # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]\n                    t1 = self.bbox_thr\n                    t2 = object_box_iou.max(\n                        dim=1, keepdim=True).values.clamp(min=t1 + 1e-12)\n                    object_box_prob = ((object_box_iou - t1) /\n                                       (t2 - t1)).clamp(\n                                           min=0, max=1)\n\n                    # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]\n                    num_obj = gt_labels_.size(0)\n                    indices = torch.stack([\n                        torch.arange(num_obj).type_as(gt_labels_), gt_labels_\n                    ],\n                                          dim=0)\n                    object_cls_box_prob = torch.sparse_coo_tensor(\n                        indices, object_box_prob)\n\n                    # image_box_iou: P{a_{j} \\in A_{+}}, shape: [c, j]\n                    \"\"\"\n                    from \"start\" to \"end\" implement:\n                    image_box_iou = torch.sparse.max(object_cls_box_prob,\n                                                     dim=0).t()\n\n                    \"\"\"\n                    # start\n                    box_cls_prob = torch.sparse.sum(\n                        object_cls_box_prob, dim=0).to_dense()\n\n                    indices = torch.nonzero(box_cls_prob, as_tuple=False).t_()\n                    if indices.numel() == 0:\n                        image_box_prob = torch.zeros(\n                            anchors_.size(0),\n                            self.cls_out_channels).type_as(object_box_prob)\n                    else:\n                        nonzero_box_prob = torch.where(\n                            (gt_labels_.unsqueeze(dim=-1) == indices[0]),\n                            object_box_prob[:, indices[1]],\n                            torch.tensor([\n                                0\n                            ]).type_as(object_box_prob)).max(dim=0).values\n\n                        # upmap to shape [j, c]\n                        image_box_prob = torch.sparse_coo_tensor(\n                            indices.flip([0]),\n                            nonzero_box_prob,\n                            size=(anchors_.size(0),\n                                  self.cls_out_channels)).to_dense()\n                    # end\n\n                box_prob.append(image_box_prob)\n\n            # construct bags for objects\n            match_quality_matrix = bbox_overlaps(gt_bboxes_, anchors_)\n            _, matched = torch.topk(\n                match_quality_matrix,\n                self.pre_anchor_topk,\n                dim=1,\n                sorted=False)\n            del match_quality_matrix\n\n            # matched_cls_prob: P_{ij}^{cls}\n            matched_cls_prob = torch.gather(\n                cls_prob_[matched], 2,\n                gt_labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk,\n                                                 1)).squeeze(2)\n\n            # matched_box_prob: P_{ij}^{loc}\n            matched_anchors = anchors_[matched]\n            matched_object_targets = self.bbox_coder.encode(\n                matched_anchors,\n                gt_bboxes_.unsqueeze(dim=1).expand_as(matched_anchors))\n            loss_bbox = self.loss_bbox(\n                bbox_preds_[matched],\n                matched_object_targets,\n                reduction_override='none').sum(-1)\n            matched_box_prob = torch.exp(-loss_bbox)\n\n            # positive_losses: {-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )}\n            num_pos += len(gt_bboxes_)\n            positive_losses.append(\n                self.positive_bag_loss(matched_cls_prob, matched_box_prob))\n        positive_loss = torch.cat(positive_losses).sum() / max(1, num_pos)\n\n        # box_prob: P{a_{j} \\in A_{+}}\n        box_prob = torch.stack(box_prob, dim=0)\n\n        # negative_loss:\n        # \\sum_{j}{ FL((1 - P{a_{j} \\in A_{+}}) * (1 - P_{j}^{bg})) } / n||B||\n        negative_loss = self.negative_bag_loss(cls_prob, box_prob).sum() / max(\n            1, num_pos * self.pre_anchor_topk)\n\n        # avoid the absence of gradients in regression subnet\n        # when no ground-truth in a batch\n        if num_pos == 0:\n            positive_loss = bbox_preds.sum() * 0\n\n        losses = {\n            'positive_bag_loss': positive_loss,\n            'negative_bag_loss': negative_loss\n        }\n        return losses\n\n    def positive_bag_loss(self, matched_cls_prob, matched_box_prob):\n        \"\"\"Compute positive bag loss.\n\n        :math:`-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )`.\n\n        :math:`P_{ij}^{cls}`: matched_cls_prob, classification probability of matched samples.\n\n        :math:`P_{ij}^{loc}`: matched_box_prob, box probability of matched samples.\n\n        Args:\n            matched_cls_prob (Tensor): Classification probability of matched\n                samples in shape (num_gt, pre_anchor_topk).\n            matched_box_prob (Tensor): BBox probability of matched samples,\n                in shape (num_gt, pre_anchor_topk).\n\n        Returns:\n            Tensor: Positive bag loss in shape (num_gt,).\n        \"\"\"  # noqa: E501, W605\n        # bag_prob = Mean-max(matched_prob)\n        matched_prob = matched_cls_prob * matched_box_prob\n        weight = 1 / torch.clamp(1 - matched_prob, 1e-12, None)\n        weight /= weight.sum(dim=1).unsqueeze(dim=-1)\n        bag_prob = (weight * matched_prob).sum(dim=1)\n        # positive_bag_loss = -self.alpha * log(bag_prob)\n        return self.alpha * F.binary_cross_entropy(\n            bag_prob, torch.ones_like(bag_prob), reduction='none')\n\n    def negative_bag_loss(self, cls_prob, box_prob):\n        \"\"\"Compute negative bag loss.\n\n        :math:`FL((1 - P_{a_{j} \\in A_{+}}) * (1 - P_{j}^{bg}))`.\n\n        :math:`P_{a_{j} \\in A_{+}}`: Box_probability of matched samples.\n\n        :math:`P_{j}^{bg}`: Classification probability of negative samples.\n\n        Args:\n            cls_prob (Tensor): Classification probability, in shape\n                (num_img, num_anchors, num_classes).\n            box_prob (Tensor): Box probability, in shape\n                (num_img, num_anchors, num_classes).\n\n        Returns:\n            Tensor: Negative bag loss in shape (num_img, num_anchors, num_classes).\n        \"\"\"  # noqa: E501, W605\n        prob = cls_prob * (1 - box_prob)\n        # There are some cases when neg_prob = 0.\n        # This will cause the neg_prob.log() to be inf without clamp.\n        prob = prob.clamp(min=EPS, max=1 - EPS)\n        negative_bag_loss = prob**self.gamma * F.binary_cross_entropy(\n            prob, torch.zeros_like(prob), reduction='none')\n        return (1 - self.alpha) * negative_bag_loss\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/fsaf_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, images_to_levels, multi_apply,\n                        unmap)\nfrom ..builder import HEADS\nfrom ..losses.accuracy import accuracy\nfrom ..losses.utils import weight_reduce_loss\nfrom .retina_head import RetinaHead\n\n\n@HEADS.register_module()\nclass FSAFHead(RetinaHead):\n    \"\"\"Anchor-free head used in `FSAF <https://arxiv.org/abs/1903.00621>`_.\n\n    The head contains two subnetworks. The first classifies anchor boxes and\n    the second regresses deltas for the anchors (num_anchors is 1 for anchor-\n    free methods)\n\n    Args:\n        *args: Same as its base class in :class:`RetinaHead`\n        score_threshold (float, optional): The score_threshold to calculate\n            positive recall. If given, prediction scores lower than this value\n            is counted as incorrect prediction. Default to None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n        **kwargs: Same as its base class in :class:`RetinaHead`\n\n    Example:\n        >>> import torch\n        >>> self = FSAFHead(11, 7)\n        >>> x = torch.rand(1, 7, 32, 32)\n        >>> cls_score, bbox_pred = self.forward_single(x)\n        >>> # Each anchor predicts a score for each class except background\n        >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors\n        >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors\n        >>> assert cls_per_anchor == self.num_classes\n        >>> assert box_per_anchor == 4\n    \"\"\"\n\n    def __init__(self, *args, score_threshold=None, init_cfg=None, **kwargs):\n        # The positive bias in self.retina_reg conv is to prevent predicted \\\n        #  bbox with 0 area\n        if init_cfg is None:\n            init_cfg = dict(\n                type='Normal',\n                layer='Conv2d',\n                std=0.01,\n                override=[\n                    dict(\n                        type='Normal',\n                        name='retina_cls',\n                        std=0.01,\n                        bias_prob=0.01),\n                    dict(\n                        type='Normal', name='retina_reg', std=0.01, bias=0.25)\n                ])\n        super().__init__(*args, init_cfg=init_cfg, **kwargs)\n        self.score_threshold = score_threshold\n\n    def forward_single(self, x):\n        \"\"\"Forward feature map of a single scale level.\n\n        Args:\n            x (Tensor): Feature map of a single scale level.\n\n        Returns:\n            tuple (Tensor):\n                cls_score (Tensor): Box scores for each scale level\n                    Has shape (N, num_points * num_classes, H, W).\n                bbox_pred (Tensor): Box energies / deltas for each scale\n                    level with shape (N, num_points * 4, H, W).\n        \"\"\"\n        cls_score, bbox_pred = super().forward_single(x)\n        # relu: TBLR encoder only accepts positive bbox_pred\n        return cls_score, self.relu(bbox_pred)\n\n    def _get_targets_single(self,\n                            flat_anchors,\n                            valid_flags,\n                            gt_bboxes,\n                            gt_bboxes_ignore,\n                            gt_labels,\n                            img_meta,\n                            label_channels=1,\n                            unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in a\n        single image.\n\n        Most of the codes are the same with the base class\n          :obj: `AnchorHead`, except that it also collects and returns\n          the matched gt index in the image (from 0 to num_gt-1). If the\n          anchor bbox is not matched to any gt, the corresponding value in\n          pos_gt_inds is -1.\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 7\n        # Assign gt and sample anchors\n        anchors = flat_anchors[inside_flags.type(torch.bool), :]\n        assign_result = self.assigner.assign(\n            anchors, gt_bboxes, gt_bboxes_ignore,\n            None if self.sampling else gt_labels)\n\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros((num_valid_anchors, label_channels),\n                                          dtype=torch.float)\n        pos_gt_inds = anchors.new_full((num_valid_anchors, ),\n                                       -1,\n                                       dtype=torch.long)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n\n        if len(pos_inds) > 0:\n            if not self.reg_decoded_bbox:\n                pos_bbox_targets = self.bbox_coder.encode(\n                    sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n            else:\n                # When the regression loss (e.g. `IouLoss`, `GIouLoss`)\n                # is applied directly on the decoded bounding boxes, both\n                # the predicted boxes and regression targets should be with\n                # absolute coordinate format.\n                pos_bbox_targets = sampling_result.pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            # The assigned gt_index for each anchor. (0-based)\n            pos_gt_inds[pos_inds] = sampling_result.pos_assigned_gt_inds\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # shadowed_labels is a tensor composed of tuples\n        #  (anchor_inds, class_label) that indicate those anchors lying in the\n        #  outer region of a gt or overlapped by another gt with a smaller\n        #  area.\n        #\n        # Therefore, only the shadowed labels are ignored for loss calculation.\n        # the key `shadowed_labels` is defined in :obj:`CenterRegionAssigner`\n        shadowed_labels = assign_result.get_extra_property('shadowed_labels')\n        if shadowed_labels is not None and shadowed_labels.numel():\n            if len(shadowed_labels.shape) == 2:\n                idx_, label_ = shadowed_labels[:, 0], shadowed_labels[:, 1]\n                assert (labels[idx_] != label_).all(), \\\n                    'One label cannot be both positive and ignored'\n                label_weights[idx_, label_] = 0\n            else:\n                label_weights[shadowed_labels] = 0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            labels = unmap(labels, num_total_anchors, inside_flags)\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n            pos_gt_inds = unmap(\n                pos_gt_inds, num_total_anchors, inside_flags, fill=-1)\n\n        return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,\n                neg_inds, sampling_result, pos_gt_inds)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_points * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_points * 4, H, W).\n            gt_bboxes (list[Tensor]): each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        for i in range(len(bbox_preds)):  # loop over fpn level\n            # avoid 0 area of the predicted bbox\n            bbox_preds[i] = bbox_preds[i].clamp(min=1e-4)\n        # TODO: It may directly use the base-class loss function.\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n        batch_size = len(gt_bboxes)\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg,\n         pos_assigned_gt_inds_list) = cls_reg_targets\n\n        num_gts = np.array(list(map(len, gt_labels)))\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        # concat all level anchors and flags to a single tensor\n        concat_anchor_list = []\n        for i in range(len(anchor_list)):\n            concat_anchor_list.append(torch.cat(anchor_list[i]))\n        all_anchor_list = images_to_levels(concat_anchor_list,\n                                           num_level_anchors)\n        losses_cls, losses_bbox = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            all_anchor_list,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            num_total_samples=num_total_samples)\n\n        # `pos_assigned_gt_inds_list` (length: fpn_levels) stores the assigned\n        # gt index of each anchor bbox in each fpn level.\n        cum_num_gts = list(np.cumsum(num_gts))  # length of batch_size\n        for i, assign in enumerate(pos_assigned_gt_inds_list):\n            # loop over fpn levels\n            for j in range(1, batch_size):\n                # loop over batch size\n                # Convert gt indices in each img to those in the batch\n                assign[j][assign[j] >= 0] += int(cum_num_gts[j - 1])\n            pos_assigned_gt_inds_list[i] = assign.flatten()\n            labels_list[i] = labels_list[i].flatten()\n        num_gts = sum(map(len, gt_labels))  # total number of gt in the batch\n        # The unique label index of each gt in the batch\n        label_sequence = torch.arange(num_gts, device=device)\n        # Collect the average loss of each gt in each level\n        with torch.no_grad():\n            loss_levels, = multi_apply(\n                self.collect_loss_level_single,\n                losses_cls,\n                losses_bbox,\n                pos_assigned_gt_inds_list,\n                labels_seq=label_sequence)\n            # Shape: (fpn_levels, num_gts). Loss of each gt at each fpn level\n            loss_levels = torch.stack(loss_levels, dim=0)\n            # Locate the best fpn level for loss back-propagation\n            if loss_levels.numel() == 0:  # zero gt\n                argmin = loss_levels.new_empty((num_gts, ), dtype=torch.long)\n            else:\n                _, argmin = loss_levels.min(dim=0)\n\n        # Reweight the loss of each (anchor, label) pair, so that only those\n        #  at the best gt level are back-propagated.\n        losses_cls, losses_bbox, pos_inds = multi_apply(\n            self.reweight_loss_single,\n            losses_cls,\n            losses_bbox,\n            pos_assigned_gt_inds_list,\n            labels_list,\n            list(range(len(losses_cls))),\n            min_levels=argmin)\n        num_pos = torch.cat(pos_inds, 0).sum().float()\n        pos_recall = self.calculate_pos_recall(cls_scores, labels_list,\n                                               pos_inds)\n\n        if num_pos == 0:  # No gt\n            avg_factor = num_pos + float(num_total_neg)\n        else:\n            avg_factor = num_pos\n        for i in range(len(losses_cls)):\n            losses_cls[i] /= avg_factor\n            losses_bbox[i] /= avg_factor\n        return dict(\n            loss_cls=losses_cls,\n            loss_bbox=losses_bbox,\n            num_pos=num_pos / batch_size,\n            pos_recall=pos_recall)\n\n    def calculate_pos_recall(self, cls_scores, labels_list, pos_inds):\n        \"\"\"Calculate positive recall with score threshold.\n\n        Args:\n            cls_scores (list[Tensor]): Classification scores at all fpn levels.\n                Each tensor is in shape (N, num_classes * num_anchors, H, W)\n            labels_list (list[Tensor]): The label that each anchor is assigned\n                to. Shape (N * H * W * num_anchors, )\n            pos_inds (list[Tensor]): List of bool tensors indicating whether\n                the anchor is assigned to a positive label.\n                Shape (N * H * W * num_anchors, )\n\n        Returns:\n            Tensor: A single float number indicating the positive recall.\n        \"\"\"\n        with torch.no_grad():\n            num_class = self.num_classes\n            scores = [\n                cls.permute(0, 2, 3, 1).reshape(-1, num_class)[pos]\n                for cls, pos in zip(cls_scores, pos_inds)\n            ]\n            labels = [\n                label.reshape(-1)[pos]\n                for label, pos in zip(labels_list, pos_inds)\n            ]\n            scores = torch.cat(scores, dim=0)\n            labels = torch.cat(labels, dim=0)\n            if self.use_sigmoid_cls:\n                scores = scores.sigmoid()\n            else:\n                scores = scores.softmax(dim=1)\n\n            return accuracy(scores, labels, thresh=self.score_threshold)\n\n    def collect_loss_level_single(self, cls_loss, reg_loss, assigned_gt_inds,\n                                  labels_seq):\n        \"\"\"Get the average loss in each FPN level w.r.t. each gt label.\n\n        Args:\n            cls_loss (Tensor): Classification loss of each feature map pixel,\n              shape (num_anchor, num_class)\n            reg_loss (Tensor): Regression loss of each feature map pixel,\n              shape (num_anchor, 4)\n            assigned_gt_inds (Tensor): It indicates which gt the prior is\n              assigned to (0-based, -1: no assignment). shape (num_anchor),\n            labels_seq: The rank of labels. shape (num_gt)\n\n        Returns:\n            shape: (num_gt), average loss of each gt in this level\n        \"\"\"\n        if len(reg_loss.shape) == 2:  # iou loss has shape (num_prior, 4)\n            reg_loss = reg_loss.sum(dim=-1)  # sum loss in tblr dims\n        if len(cls_loss.shape) == 2:\n            cls_loss = cls_loss.sum(dim=-1)  # sum loss in class dims\n        loss = cls_loss + reg_loss\n        assert loss.size(0) == assigned_gt_inds.size(0)\n        # Default loss value is 1e6 for a layer where no anchor is positive\n        #  to ensure it will not be chosen to back-propagate gradient\n        losses_ = loss.new_full(labels_seq.shape, 1e6)\n        for i, l in enumerate(labels_seq):\n            match = assigned_gt_inds == l\n            if match.any():\n                losses_[i] = loss[match].mean()\n        return losses_,\n\n    def reweight_loss_single(self, cls_loss, reg_loss, assigned_gt_inds,\n                             labels, level, min_levels):\n        \"\"\"Reweight loss values at each level.\n\n        Reassign loss values at each level by masking those where the\n        pre-calculated loss is too large. Then return the reduced losses.\n\n        Args:\n            cls_loss (Tensor): Element-wise classification loss.\n              Shape: (num_anchors, num_classes)\n            reg_loss (Tensor): Element-wise regression loss.\n              Shape: (num_anchors, 4)\n            assigned_gt_inds (Tensor): The gt indices that each anchor bbox\n              is assigned to. -1 denotes a negative anchor, otherwise it is the\n              gt index (0-based). Shape: (num_anchors, ),\n            labels (Tensor): Label assigned to anchors. Shape: (num_anchors, ).\n            level (int): The current level index in the pyramid\n              (0-4 for RetinaNet)\n            min_levels (Tensor): The best-matching level for each gt.\n              Shape: (num_gts, ),\n\n        Returns:\n            tuple:\n                - cls_loss: Reduced corrected classification loss. Scalar.\n                - reg_loss: Reduced corrected regression loss. Scalar.\n                - pos_flags (Tensor): Corrected bool tensor indicating the\n                  final positive anchors. Shape: (num_anchors, ).\n        \"\"\"\n        loc_weight = torch.ones_like(reg_loss)\n        cls_weight = torch.ones_like(cls_loss)\n        pos_flags = assigned_gt_inds >= 0  # positive pixel flag\n        pos_indices = torch.nonzero(pos_flags, as_tuple=False).flatten()\n\n        if pos_flags.any():  # pos pixels exist\n            pos_assigned_gt_inds = assigned_gt_inds[pos_flags]\n            zeroing_indices = (min_levels[pos_assigned_gt_inds] != level)\n            neg_indices = pos_indices[zeroing_indices]\n\n            if neg_indices.numel():\n                pos_flags[neg_indices] = 0\n                loc_weight[neg_indices] = 0\n                # Only the weight corresponding to the label is\n                #  zeroed out if not selected\n                zeroing_labels = labels[neg_indices]\n                assert (zeroing_labels >= 0).all()\n                cls_weight[neg_indices, zeroing_labels] = 0\n\n        # Weighted loss for both cls and reg loss\n        cls_loss = weight_reduce_loss(cls_loss, cls_weight, reduction='sum')\n        reg_loss = weight_reduce_loss(reg_loss, loc_weight, reduction='sum')\n\n        return cls_loss, reg_loss, pos_flags\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/ga_retina_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import MaskedConv2d\n\nfrom ..builder import HEADS\nfrom .guided_anchor_head import FeatureAdaption, GuidedAnchorHead\n\n\n@HEADS.register_module()\nclass GARetinaHead(GuidedAnchorHead):\n    \"\"\"Guided-Anchor-based RetinaNet head.\"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=None,\n                 **kwargs):\n        if init_cfg is None:\n            init_cfg = dict(\n                type='Normal',\n                layer='Conv2d',\n                std=0.01,\n                override=[\n                    dict(\n                        type='Normal',\n                        name='conv_loc',\n                        std=0.01,\n                        bias_prob=0.01),\n                    dict(\n                        type='Normal',\n                        name='retina_cls',\n                        std=0.01,\n                        bias_prob=0.01)\n                ])\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        super(GARetinaHead, self).__init__(\n            num_classes, in_channels, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n\n        self.conv_loc = nn.Conv2d(self.feat_channels, 1, 1)\n        self.conv_shape = nn.Conv2d(self.feat_channels, self.num_anchors * 2,\n                                    1)\n        self.feature_adaption_cls = FeatureAdaption(\n            self.feat_channels,\n            self.feat_channels,\n            kernel_size=3,\n            deform_groups=self.deform_groups)\n        self.feature_adaption_reg = FeatureAdaption(\n            self.feat_channels,\n            self.feat_channels,\n            kernel_size=3,\n            deform_groups=self.deform_groups)\n        self.retina_cls = MaskedConv2d(\n            self.feat_channels,\n            self.num_base_priors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.retina_reg = MaskedConv2d(\n            self.feat_channels, self.num_base_priors * 4, 3, padding=1)\n\n    def forward_single(self, x):\n        \"\"\"Forward feature map of a single scale level.\"\"\"\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n\n        loc_pred = self.conv_loc(cls_feat)\n        shape_pred = self.conv_shape(reg_feat)\n\n        cls_feat = self.feature_adaption_cls(cls_feat, shape_pred)\n        reg_feat = self.feature_adaption_reg(reg_feat, shape_pred)\n\n        if not self.training:\n            mask = loc_pred.sigmoid()[0] >= self.loc_filter_thr\n        else:\n            mask = None\n        cls_score = self.retina_cls(cls_feat, mask)\n        bbox_pred = self.retina_reg(reg_feat, mask)\n        return cls_score, bbox_pred, shape_pred, loc_pred\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/ga_rpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport warnings\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv import ConfigDict\nfrom mmcv.ops import nms\n\nfrom ..builder import HEADS\nfrom .guided_anchor_head import GuidedAnchorHead\n\n\n@HEADS.register_module()\nclass GARPNHead(GuidedAnchorHead):\n    \"\"\"Guided-Anchor-based RPN head.\"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='conv_loc',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        super(GARPNHead, self).__init__(\n            1, in_channels, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.rpn_conv = nn.Conv2d(\n            self.in_channels, self.feat_channels, 3, padding=1)\n        super(GARPNHead, self)._init_layers()\n\n    def forward_single(self, x):\n        \"\"\"Forward feature of a single scale level.\"\"\"\n\n        x = self.rpn_conv(x)\n        x = F.relu(x, inplace=True)\n        (cls_score, bbox_pred, shape_pred,\n         loc_pred) = super(GARPNHead, self).forward_single(x)\n        return cls_score, bbox_pred, shape_pred, loc_pred\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             shape_preds,\n             loc_preds,\n             gt_bboxes,\n             img_metas,\n             gt_bboxes_ignore=None):\n        losses = super(GARPNHead, self).loss(\n            cls_scores,\n            bbox_preds,\n            shape_preds,\n            loc_preds,\n            gt_bboxes,\n            None,\n            img_metas,\n            gt_bboxes_ignore=gt_bboxes_ignore)\n        return dict(\n            loss_rpn_cls=losses['loss_cls'],\n            loss_rpn_bbox=losses['loss_bbox'],\n            loss_anchor_shape=losses['loss_shape'],\n            loss_anchor_loc=losses['loss_loc'])\n\n    def _get_bboxes_single(self,\n                           cls_scores,\n                           bbox_preds,\n                           mlvl_anchors,\n                           mlvl_masks,\n                           img_shape,\n                           scale_factor,\n                           cfg,\n                           rescale=False):\n        cfg = self.test_cfg if cfg is None else cfg\n\n        cfg = copy.deepcopy(cfg)\n\n        # deprecate arguments warning\n        if 'nms' not in cfg or 'max_num' in cfg or 'nms_thr' in cfg:\n            warnings.warn(\n                'In rpn_proposal or test_cfg, '\n                'nms_thr has been moved to a dict named nms as '\n                'iou_threshold, max_num has been renamed as max_per_img, '\n                'name of original arguments and the way to specify '\n                'iou_threshold of NMS will be deprecated.')\n        if 'nms' not in cfg:\n            cfg.nms = ConfigDict(dict(type='nms', iou_threshold=cfg.nms_thr))\n        if 'max_num' in cfg:\n            if 'max_per_img' in cfg:\n                assert cfg.max_num == cfg.max_per_img, f'You ' \\\n                    f'set max_num and max_per_img at the same time, ' \\\n                    f'but get {cfg.max_num} ' \\\n                    f'and {cfg.max_per_img} respectively' \\\n                    'Please delete max_num which will be deprecated.'\n            else:\n                cfg.max_per_img = cfg.max_num\n        if 'nms_thr' in cfg:\n            assert cfg.nms.iou_threshold == cfg.nms_thr, f'You set ' \\\n                f'iou_threshold in nms and ' \\\n                f'nms_thr at the same time, but get ' \\\n                f'{cfg.nms.iou_threshold} and {cfg.nms_thr}' \\\n                f' respectively. Please delete the ' \\\n                f'nms_thr which will be deprecated.'\n\n        assert cfg.nms.get('type', 'nms') == 'nms', 'GARPNHead only support ' \\\n            'naive nms.'\n\n        mlvl_proposals = []\n        for idx in range(len(cls_scores)):\n            rpn_cls_score = cls_scores[idx]\n            rpn_bbox_pred = bbox_preds[idx]\n            anchors = mlvl_anchors[idx]\n            mask = mlvl_masks[idx]\n            assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]\n            # if no location is kept, end.\n            if mask.sum() == 0:\n                continue\n            rpn_cls_score = rpn_cls_score.permute(1, 2, 0)\n            if self.use_sigmoid_cls:\n                rpn_cls_score = rpn_cls_score.reshape(-1)\n                scores = rpn_cls_score.sigmoid()\n            else:\n                rpn_cls_score = rpn_cls_score.reshape(-1, 2)\n                # remind that we set FG labels to [0, num_class-1]\n                # since mmdet v2.0\n                # BG cat_id: num_class\n                scores = rpn_cls_score.softmax(dim=1)[:, :-1]\n            # filter scores, bbox_pred w.r.t. mask.\n            # anchors are filtered in get_anchors() beforehand.\n            scores = scores[mask]\n            rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1,\n                                                                   4)[mask, :]\n            if scores.dim() == 0:\n                rpn_bbox_pred = rpn_bbox_pred.unsqueeze(0)\n                anchors = anchors.unsqueeze(0)\n                scores = scores.unsqueeze(0)\n            # filter anchors, bbox_pred, scores w.r.t. scores\n            if cfg.nms_pre > 0 and scores.shape[0] > cfg.nms_pre:\n                _, topk_inds = scores.topk(cfg.nms_pre)\n                rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]\n                anchors = anchors[topk_inds, :]\n                scores = scores[topk_inds]\n            # get proposals w.r.t. anchors and rpn_bbox_pred\n            proposals = self.bbox_coder.decode(\n                anchors, rpn_bbox_pred, max_shape=img_shape)\n            # filter out too small bboxes\n            if cfg.min_bbox_size >= 0:\n                w = proposals[:, 2] - proposals[:, 0]\n                h = proposals[:, 3] - proposals[:, 1]\n                valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)\n                if not valid_mask.all():\n                    proposals = proposals[valid_mask]\n                    scores = scores[valid_mask]\n\n            # NMS in current level\n            proposals, _ = nms(proposals, scores, cfg.nms.iou_threshold)\n            proposals = proposals[:cfg.nms_post, :]\n            mlvl_proposals.append(proposals)\n        proposals = torch.cat(mlvl_proposals, 0)\n        if cfg.get('nms_across_levels', False):\n            # NMS across multi levels\n            proposals, _ = nms(proposals[:, :4], proposals[:, -1],\n                               cfg.nms.iou_threshold)\n            proposals = proposals[:cfg.max_per_img, :]\n        else:\n            scores = proposals[:, 4]\n            num = min(cfg.max_per_img, proposals.shape[0])\n            _, topk_inds = scores.topk(num)\n            proposals = proposals[topk_inds, :]\n        return proposals\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/gfl_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule, Scale\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, bbox_overlaps, build_assigner,\n                        build_sampler, images_to_levels, multi_apply,\n                        reduce_mean, unmap)\nfrom mmdet.core.utils import filter_scores_and_topk\nfrom ..builder import HEADS, build_loss\nfrom .anchor_head import AnchorHead\n\n\nclass Integral(nn.Module):\n    \"\"\"A fixed layer for calculating integral result from distribution.\n\n    This layer calculates the target location by :math: `sum{P(y_i) * y_i}`,\n    P(y_i) denotes the softmax vector that represents the discrete distribution\n    y_i denotes the discrete set, usually {0, 1, 2, ..., reg_max}\n\n    Args:\n        reg_max (int): The maximal value of the discrete set. Default: 16. You\n            may want to reset it according to your new dataset or related\n            settings.\n    \"\"\"\n\n    def __init__(self, reg_max=16):\n        super(Integral, self).__init__()\n        self.reg_max = reg_max\n        self.register_buffer('project',\n                             torch.linspace(0, self.reg_max, self.reg_max + 1))\n\n    def forward(self, x):\n        \"\"\"Forward feature from the regression head to get integral result of\n        bounding box location.\n\n        Args:\n            x (Tensor): Features of the regression head, shape (N, 4*(n+1)),\n                n is self.reg_max.\n\n        Returns:\n            x (Tensor): Integral result of box locations, i.e., distance\n                offsets from the box center in four directions, shape (N, 4).\n        \"\"\"\n        x = F.softmax(x.reshape(-1, self.reg_max + 1), dim=1)\n        x = F.linear(x, self.project.type_as(x)).reshape(-1, 4)\n        return x\n\n\n@HEADS.register_module()\nclass GFLHead(AnchorHead):\n    \"\"\"Generalized Focal Loss: Learning Qualified and Distributed Bounding\n    Boxes for Dense Object Detection.\n\n    GFL head structure is similar with ATSS, however GFL uses\n    1) joint representation for classification and localization quality, and\n    2) flexible General distribution for bounding box locations,\n    which are supervised by\n    Quality Focal Loss (QFL) and Distribution Focal Loss (DFL), respectively\n\n    https://arxiv.org/abs/2006.04388\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        stacked_convs (int): Number of conv layers in cls and reg tower.\n            Default: 4.\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: dict(type='GN', num_groups=32, requires_grad=True).\n        loss_qfl (dict): Config of Quality Focal Loss (QFL).\n        bbox_coder (dict): Config of bbox coder. Defaults\n            'DistancePointBBoxCoder'.\n        reg_max (int): Max value of integral set :math: `{0, ..., reg_max}`\n            in QFL setting. Default: 16.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    Example:\n        >>> self = GFLHead(11, 7)\n        >>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]\n        >>> cls_quality_score, bbox_pred = self.forward(feats)\n        >>> assert len(cls_quality_score) == len(self.scales)\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 loss_dfl=dict(type='DistributionFocalLoss', loss_weight=0.25),\n                 bbox_coder=dict(type='DistancePointBBoxCoder'),\n                 reg_max=16,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='gfl_cls',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.reg_max = reg_max\n        super(GFLHead, self).__init__(\n            num_classes,\n            in_channels,\n            bbox_coder=bbox_coder,\n            init_cfg=init_cfg,\n            **kwargs)\n\n        self.sampling = False\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # SSD sampling=False so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.integral = Integral(self.reg_max)\n        self.loss_dfl = build_loss(loss_dfl)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        assert self.num_anchors == 1, 'anchor free version'\n        self.gfl_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n        self.gfl_reg = nn.Conv2d(\n            self.feat_channels, 4 * (self.reg_max + 1), 3, padding=1)\n        self.scales = nn.ModuleList(\n            [Scale(1.0) for _ in self.prior_generator.strides])\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually a tuple of classification scores and bbox prediction\n                cls_scores (list[Tensor]): Classification and quality (IoU)\n                    joint scores for all scale levels, each is a 4D-tensor,\n                    the channel number is num_classes.\n                bbox_preds (list[Tensor]): Box distribution logits for all\n                    scale levels, each is a 4D-tensor, the channel number is\n                    4*(n+1), n is max value of integral set.\n        \"\"\"\n        return multi_apply(self.forward_single, feats, self.scales)\n\n    def forward_single(self, x, scale):\n        \"\"\"Forward feature of a single scale level.\n\n        Args:\n            x (Tensor): Features of a single scale level.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n\n        Returns:\n            tuple:\n                cls_score (Tensor): Cls and quality joint scores for a single\n                    scale level the channel number is num_classes.\n                bbox_pred (Tensor): Box distribution logits for a single scale\n                    level, the channel number is 4*(n+1), n is max value of\n                    integral set.\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n        cls_score = self.gfl_cls(cls_feat)\n        bbox_pred = scale(self.gfl_reg(reg_feat)).float()\n        return cls_score, bbox_pred\n\n    def anchor_center(self, anchors):\n        \"\"\"Get anchor centers from anchors.\n\n        Args:\n            anchors (Tensor): Anchor list with shape (N, 4), \"xyxy\" format.\n\n        Returns:\n            Tensor: Anchor centers with shape (N, 2), \"xy\" format.\n        \"\"\"\n        anchors_cx = (anchors[..., 2] + anchors[..., 0]) / 2\n        anchors_cy = (anchors[..., 3] + anchors[..., 1]) / 2\n        return torch.stack([anchors_cx, anchors_cy], dim=-1)\n\n    def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights,\n                    bbox_targets, stride, num_total_samples):\n        \"\"\"Compute loss of a single scale level.\n\n        Args:\n            anchors (Tensor): Box reference for each scale level with shape\n                (N, num_total_anchors, 4).\n            cls_score (Tensor): Cls and quality joint scores for each scale\n                level has shape (N, num_classes, H, W).\n            bbox_pred (Tensor): Box distribution logits for each scale\n                level with shape (N, 4*(n+1), H, W), n is max value of integral\n                set.\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors)\n            bbox_targets (Tensor): BBox regression targets of each anchor\n                weight shape (N, num_total_anchors, 4).\n            stride (tuple): Stride in this scale level.\n            num_total_samples (int): Number of positive samples that is\n                reduced over all GPUs.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert stride[0] == stride[1], 'h stride is not equal to w stride!'\n        anchors = anchors.reshape(-1, 4)\n        cls_score = cls_score.permute(0, 2, 3,\n                                      1).reshape(-1, self.cls_out_channels)\n        bbox_pred = bbox_pred.permute(0, 2, 3,\n                                      1).reshape(-1, 4 * (self.reg_max + 1))\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    & (labels < bg_class_ind)).nonzero().squeeze(1)\n        score = label_weights.new_zeros(labels.shape)\n\n        if len(pos_inds) > 0:\n            pos_bbox_targets = bbox_targets[pos_inds]\n            pos_bbox_pred = bbox_pred[pos_inds]\n            pos_anchors = anchors[pos_inds]\n            pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0]\n\n            weight_targets = cls_score.detach().sigmoid()\n            weight_targets = weight_targets.max(dim=1)[0][pos_inds]\n            pos_bbox_pred_corners = self.integral(pos_bbox_pred)\n            pos_decode_bbox_pred = self.bbox_coder.decode(\n                pos_anchor_centers, pos_bbox_pred_corners)\n            pos_decode_bbox_targets = pos_bbox_targets / stride[0]\n            score[pos_inds] = bbox_overlaps(\n                pos_decode_bbox_pred.detach(),\n                pos_decode_bbox_targets,\n                is_aligned=True)\n            pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1)\n            target_corners = self.bbox_coder.encode(pos_anchor_centers,\n                                                    pos_decode_bbox_targets,\n                                                    self.reg_max).reshape(-1)\n\n            # regression loss\n            loss_bbox = self.loss_bbox(\n                pos_decode_bbox_pred,\n                pos_decode_bbox_targets,\n                weight=weight_targets,\n                avg_factor=1.0)\n\n            # dfl loss\n            loss_dfl = self.loss_dfl(\n                pred_corners,\n                target_corners,\n                weight=weight_targets[:, None].expand(-1, 4).reshape(-1),\n                avg_factor=4.0)\n        else:\n            loss_bbox = bbox_pred.sum() * 0\n            loss_dfl = bbox_pred.sum() * 0\n            weight_targets = bbox_pred.new_tensor(0)\n\n        # cls (qfl) loss\n        loss_cls = self.loss_cls(\n            cls_score, (labels, score),\n            weight=label_weights,\n            avg_factor=num_total_samples)\n\n        return loss_cls, loss_bbox, loss_dfl, weight_targets.sum()\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Cls and quality scores for each scale\n                level has shape (N, num_classes, H, W).\n            bbox_preds (list[Tensor]): Box distribution logits for each scale\n                level with shape (N, 4*(n+1), H, W), n is max value of integral\n                set.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n\n        (anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets\n\n        num_total_samples = reduce_mean(\n            torch.tensor(num_total_pos, dtype=torch.float,\n                         device=device)).item()\n        num_total_samples = max(num_total_samples, 1.0)\n\n        losses_cls, losses_bbox, losses_dfl,\\\n            avg_factor = multi_apply(\n                self.loss_single,\n                anchor_list,\n                cls_scores,\n                bbox_preds,\n                labels_list,\n                label_weights_list,\n                bbox_targets_list,\n                self.prior_generator.strides,\n                num_total_samples=num_total_samples)\n\n        avg_factor = sum(avg_factor)\n        avg_factor = reduce_mean(avg_factor).clamp_(min=1).item()\n        losses_bbox = list(map(lambda x: x / avg_factor, losses_bbox))\n        losses_dfl = list(map(lambda x: x / avg_factor, losses_dfl))\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_dfl=losses_dfl)\n\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_priors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_priors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has shape\n                (num_priors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factor from all scale\n                levels of a single image. GFL head does not need this value.\n            mlvl_priors (list[Tensor]): Each element in the list is\n                the priors of a single level in feature pyramid, has shape\n                (num_priors, 4).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        img_shape = img_meta['img_shape']\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_labels = []\n        for level_idx, (cls_score, bbox_pred, stride, priors) in enumerate(\n                zip(cls_score_list, bbox_pred_list,\n                    self.prior_generator.strides, mlvl_priors)):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            assert stride[0] == stride[1]\n\n            bbox_pred = bbox_pred.permute(1, 2, 0)\n            bbox_pred = self.integral(bbox_pred) * stride[0]\n\n            scores = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels).sigmoid()\n\n            # After https://github.com/open-mmlab/mmdetection/pull/6268/,\n            # this operation keeps fewer bboxes under the same `nms_pre`.\n            # There is no difference in performance for most models. If you\n            # find a slight drop in performance, you can set a larger\n            # `nms_pre` than before.\n            results = filter_scores_and_topk(\n                scores, cfg.score_thr, nms_pre,\n                dict(bbox_pred=bbox_pred, priors=priors))\n            scores, labels, _, filtered_results = results\n\n            bbox_pred = filtered_results['bbox_pred']\n            priors = filtered_results['priors']\n\n            bboxes = self.bbox_coder.decode(\n                self.anchor_center(priors), bbox_pred, max_shape=img_shape)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_labels.append(labels)\n\n        return self._bbox_post_process(\n            mlvl_scores,\n            mlvl_labels,\n            mlvl_bboxes,\n            img_meta['scale_factor'],\n            cfg,\n            rescale=rescale,\n            with_nms=with_nms)\n\n    def get_targets(self,\n                    anchor_list,\n                    valid_flag_list,\n                    gt_bboxes_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None,\n                    gt_labels_list=None,\n                    label_channels=1,\n                    unmap_outputs=True):\n        \"\"\"Get targets for GFL head.\n\n        This method is almost the same as `AnchorHead.get_targets()`. Besides\n        returning the targets as the parent method does, it also returns the\n        anchors as the first element of the returned tuple.\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        num_level_anchors_list = [num_level_anchors] * num_imgs\n\n        # concat all level anchors and flags to a single tensor\n        for i in range(num_imgs):\n            assert len(anchor_list[i]) == len(valid_flag_list[i])\n            anchor_list[i] = torch.cat(anchor_list[i])\n            valid_flag_list[i] = torch.cat(valid_flag_list[i])\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        (all_anchors, all_labels, all_label_weights, all_bbox_targets,\n         all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single,\n             anchor_list,\n             valid_flag_list,\n             num_level_anchors_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             img_metas,\n             label_channels=label_channels,\n             unmap_outputs=unmap_outputs)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        anchors_list = images_to_levels(all_anchors, num_level_anchors)\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_anchors)\n        return (anchors_list, labels_list, label_weights_list,\n                bbox_targets_list, bbox_weights_list, num_total_pos,\n                num_total_neg)\n\n    def _get_target_single(self,\n                           flat_anchors,\n                           valid_flags,\n                           num_level_anchors,\n                           gt_bboxes,\n                           gt_bboxes_ignore,\n                           gt_labels,\n                           img_meta,\n                           label_channels=1,\n                           unmap_outputs=True):\n        \"\"\"Compute regression, classification targets for anchors in a single\n        image.\n\n        Args:\n            flat_anchors (Tensor): Multi-level anchors of the image, which are\n                concatenated into a single tensor of shape (num_anchors, 4)\n            valid_flags (Tensor): Multi level valid flags of the image,\n                which are concatenated into a single tensor of\n                    shape (num_anchors,).\n            num_level_anchors Tensor): Number of anchors of each scale level.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            img_meta (dict): Meta info of the image.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: N is the number of total anchors in the image.\n                anchors (Tensor): All anchors in the image with shape (N, 4).\n                labels (Tensor): Labels of all anchors in the image with shape\n                    (N,).\n                label_weights (Tensor): Label weights of all anchor in the\n                    image with shape (N,).\n                bbox_targets (Tensor): BBox targets of all anchors in the\n                    image with shape (N, 4).\n                bbox_weights (Tensor): BBox weights of all anchors in the\n                    image with shape (N, 4).\n                pos_inds (Tensor): Indices of positive anchor with shape\n                    (num_pos,).\n                neg_inds (Tensor): Indices of negative anchor with shape\n                    (num_neg,).\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 7\n        # assign gt and sample anchors\n        anchors = flat_anchors[inside_flags, :]\n\n        num_level_anchors_inside = self.get_num_level_anchors_inside(\n            num_level_anchors, inside_flags)\n        assign_result = self.assigner.assign(anchors, num_level_anchors_inside,\n                                             gt_bboxes, gt_bboxes_ignore,\n                                             gt_labels)\n\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        bbox_weights = torch.zeros_like(anchors)\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            pos_bbox_targets = sampling_result.pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1.0\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            anchors = unmap(anchors, num_total_anchors, inside_flags)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags, fill=self.num_classes)\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n\n        return (anchors, labels, label_weights, bbox_targets, bbox_weights,\n                pos_inds, neg_inds)\n\n    def get_num_level_anchors_inside(self, num_level_anchors, inside_flags):\n        split_inside_flags = torch.split(inside_flags, num_level_anchors)\n        num_level_anchors_inside = [\n            int(flags.sum()) for flags in split_inside_flags\n        ]\n        return num_level_anchors_inside\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/guided_anchor_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.ops import DeformConv2d, MaskedConv2d\nfrom mmcv.runner import BaseModule, force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, build_assigner, build_bbox_coder,\n                        build_prior_generator, build_sampler, calc_region,\n                        images_to_levels, multi_apply, multiclass_nms, unmap)\nfrom ..builder import HEADS, build_loss\nfrom .anchor_head import AnchorHead\n\n\nclass FeatureAdaption(BaseModule):\n    \"\"\"Feature Adaption Module.\n\n    Feature Adaption Module is implemented based on DCN v1.\n    It uses anchor shape prediction rather than feature map to\n    predict offsets of deform conv layer.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        out_channels (int): Number of channels in the output feature map.\n        kernel_size (int): Deformable conv kernel size.\n        deform_groups (int): Deformable conv group size.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size=3,\n                 deform_groups=4,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.1,\n                     override=dict(\n                         type='Normal', name='conv_adaption', std=0.01))):\n        super(FeatureAdaption, self).__init__(init_cfg)\n        offset_channels = kernel_size * kernel_size * 2\n        self.conv_offset = nn.Conv2d(\n            2, deform_groups * offset_channels, 1, bias=False)\n        self.conv_adaption = DeformConv2d(\n            in_channels,\n            out_channels,\n            kernel_size=kernel_size,\n            padding=(kernel_size - 1) // 2,\n            deform_groups=deform_groups)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x, shape):\n        offset = self.conv_offset(shape.detach())\n        x = self.relu(self.conv_adaption(x, offset))\n        return x\n\n\n@HEADS.register_module()\nclass GuidedAnchorHead(AnchorHead):\n    \"\"\"Guided-Anchor-based head (GA-RPN, GA-RetinaNet, etc.).\n\n    This GuidedAnchorHead will predict high-quality feature guided\n    anchors and locations where anchors will be kept in inference.\n    There are mainly 3 categories of bounding-boxes.\n\n    - Sampled 9 pairs for target assignment. (approxes)\n    - The square boxes where the predicted anchors are based on. (squares)\n    - Guided anchors.\n\n    Please refer to https://arxiv.org/abs/1901.03278 for more details.\n\n    Args:\n        num_classes (int): Number of classes.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels.\n        approx_anchor_generator (dict): Config dict for approx generator\n        square_anchor_generator (dict): Config dict for square generator\n        anchor_coder (dict): Config dict for anchor coder\n        bbox_coder (dict): Config dict for bbox coder\n        reg_decoded_bbox (bool): If true, the regression loss would be\n            applied directly on decoded bounding boxes, converting both\n            the predicted boxes and regression targets to absolute\n            coordinates format. Default False. It should be `True` when\n            using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.\n        deform_groups: (int): Group number of DCN in\n            FeatureAdaption module.\n        loc_filter_thr (float): Threshold to filter out unconcerned regions.\n        loss_loc (dict): Config of location loss.\n        loss_shape (dict): Config of anchor shape loss.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of bbox regression loss.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(\n            self,\n            num_classes,\n            in_channels,\n            feat_channels=256,\n            approx_anchor_generator=dict(\n                type='AnchorGenerator',\n                octave_base_scale=8,\n                scales_per_octave=3,\n                ratios=[0.5, 1.0, 2.0],\n                strides=[4, 8, 16, 32, 64]),\n            square_anchor_generator=dict(\n                type='AnchorGenerator',\n                ratios=[1.0],\n                scales=[8],\n                strides=[4, 8, 16, 32, 64]),\n            anchor_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[.0, .0, .0, .0],\n                target_stds=[1.0, 1.0, 1.0, 1.0]\n            ),\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[.0, .0, .0, .0],\n                target_stds=[1.0, 1.0, 1.0, 1.0]\n            ),\n            reg_decoded_bbox=False,\n            deform_groups=4,\n            loc_filter_thr=0.01,\n            train_cfg=None,\n            test_cfg=None,\n            loss_loc=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0),\n            loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),\n            loss_cls=dict(\n                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n            loss_bbox=dict(type='SmoothL1Loss', beta=1.0,\n                           loss_weight=1.0),\n            init_cfg=dict(type='Normal', layer='Conv2d', std=0.01,\n                          override=dict(type='Normal',\n                                        name='conv_loc',\n                                        std=0.01,\n                                        bias_prob=0.01))):  # yapf: disable\n        super(AnchorHead, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.feat_channels = feat_channels\n        self.deform_groups = deform_groups\n        self.loc_filter_thr = loc_filter_thr\n\n        # build approx_anchor_generator and square_anchor_generator\n        assert (approx_anchor_generator['octave_base_scale'] ==\n                square_anchor_generator['scales'][0])\n        assert (approx_anchor_generator['strides'] ==\n                square_anchor_generator['strides'])\n        self.approx_anchor_generator = build_prior_generator(\n            approx_anchor_generator)\n        self.square_anchor_generator = build_prior_generator(\n            square_anchor_generator)\n        self.approxs_per_octave = self.approx_anchor_generator \\\n            .num_base_priors[0]\n\n        self.reg_decoded_bbox = reg_decoded_bbox\n\n        # one anchor per location\n        self.num_base_priors = self.square_anchor_generator.num_base_priors[0]\n\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        self.loc_focal_loss = loss_loc['type'] in ['FocalLoss']\n        self.sampling = loss_cls['type'] not in ['FocalLoss']\n        self.ga_sampling = train_cfg is not None and hasattr(\n            train_cfg, 'ga_sampler')\n        if self.use_sigmoid_cls:\n            self.cls_out_channels = self.num_classes\n        else:\n            self.cls_out_channels = self.num_classes + 1\n\n        # build bbox_coder\n        self.anchor_coder = build_bbox_coder(anchor_coder)\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n        # build losses\n        self.loss_loc = build_loss(loss_loc)\n        self.loss_shape = build_loss(loss_shape)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # use PseudoSampler when sampling is False\n            if self.sampling and hasattr(self.train_cfg, 'sampler'):\n                sampler_cfg = self.train_cfg.sampler\n            else:\n                sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n\n            self.ga_assigner = build_assigner(self.train_cfg.ga_assigner)\n            if self.ga_sampling:\n                ga_sampler_cfg = self.train_cfg.ga_sampler\n            else:\n                ga_sampler_cfg = dict(type='PseudoSampler')\n            self.ga_sampler = build_sampler(ga_sampler_cfg, context=self)\n\n        self.fp16_enabled = False\n\n        self._init_layers()\n\n    @property\n    def num_anchors(self):\n        warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '\n                      'please use \"num_base_priors\" instead')\n        return self.square_anchor_generator.num_base_priors[0]\n\n    def _init_layers(self):\n        self.relu = nn.ReLU(inplace=True)\n        self.conv_loc = nn.Conv2d(self.in_channels, 1, 1)\n        self.conv_shape = nn.Conv2d(self.in_channels, self.num_base_priors * 2,\n                                    1)\n        self.feature_adaption = FeatureAdaption(\n            self.in_channels,\n            self.feat_channels,\n            kernel_size=3,\n            deform_groups=self.deform_groups)\n        self.conv_cls = MaskedConv2d(\n            self.feat_channels, self.num_base_priors * self.cls_out_channels,\n            1)\n        self.conv_reg = MaskedConv2d(self.feat_channels,\n                                     self.num_base_priors * 4, 1)\n\n    def forward_single(self, x):\n        loc_pred = self.conv_loc(x)\n        shape_pred = self.conv_shape(x)\n        x = self.feature_adaption(x, shape_pred)\n        # masked conv is only used during inference for speed-up\n        if not self.training:\n            mask = loc_pred.sigmoid()[0] >= self.loc_filter_thr\n        else:\n            mask = None\n        cls_score = self.conv_cls(x, mask)\n        bbox_pred = self.conv_reg(x, mask)\n        return cls_score, bbox_pred, shape_pred, loc_pred\n\n    def forward(self, feats):\n        return multi_apply(self.forward_single, feats)\n\n    def get_sampled_approxs(self, featmap_sizes, img_metas, device='cuda'):\n        \"\"\"Get sampled approxs and inside flags according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            img_metas (list[dict]): Image meta info.\n            device (torch.device | str): device for returned tensors\n\n        Returns:\n            tuple: approxes of each image, inside flags of each image\n        \"\"\"\n        num_imgs = len(img_metas)\n\n        # since feature map sizes of all images are the same, we only compute\n        # approxes for one time\n        multi_level_approxs = self.approx_anchor_generator.grid_priors(\n            featmap_sizes, device=device)\n        approxs_list = [multi_level_approxs for _ in range(num_imgs)]\n\n        # for each image, we compute inside flags of multi level approxes\n        inside_flag_list = []\n        for img_id, img_meta in enumerate(img_metas):\n            multi_level_flags = []\n            multi_level_approxs = approxs_list[img_id]\n\n            # obtain valid flags for each approx first\n            multi_level_approx_flags = self.approx_anchor_generator \\\n                .valid_flags(featmap_sizes,\n                             img_meta['pad_shape'],\n                             device=device)\n\n            for i, flags in enumerate(multi_level_approx_flags):\n                approxs = multi_level_approxs[i]\n                inside_flags_list = []\n                for i in range(self.approxs_per_octave):\n                    split_valid_flags = flags[i::self.approxs_per_octave]\n                    split_approxs = approxs[i::self.approxs_per_octave, :]\n                    inside_flags = anchor_inside_flags(\n                        split_approxs, split_valid_flags,\n                        img_meta['img_shape'][:2],\n                        self.train_cfg.allowed_border)\n                    inside_flags_list.append(inside_flags)\n                # inside_flag for a position is true if any anchor in this\n                # position is true\n                inside_flags = (\n                    torch.stack(inside_flags_list, 0).sum(dim=0) > 0)\n                multi_level_flags.append(inside_flags)\n            inside_flag_list.append(multi_level_flags)\n        return approxs_list, inside_flag_list\n\n    def get_anchors(self,\n                    featmap_sizes,\n                    shape_preds,\n                    loc_preds,\n                    img_metas,\n                    use_loc_filter=False,\n                    device='cuda'):\n        \"\"\"Get squares according to feature map sizes and guided anchors.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            shape_preds (list[tensor]): Multi-level shape predictions.\n            loc_preds (list[tensor]): Multi-level location predictions.\n            img_metas (list[dict]): Image meta info.\n            use_loc_filter (bool): Use loc filter or not.\n            device (torch.device | str): device for returned tensors\n\n        Returns:\n            tuple: square approxs of each image, guided anchors of each image,\n                loc masks of each image\n        \"\"\"\n        num_imgs = len(img_metas)\n        num_levels = len(featmap_sizes)\n\n        # since feature map sizes of all images are the same, we only compute\n        # squares for one time\n        multi_level_squares = self.square_anchor_generator.grid_priors(\n            featmap_sizes, device=device)\n        squares_list = [multi_level_squares for _ in range(num_imgs)]\n\n        # for each image, we compute multi level guided anchors\n        guided_anchors_list = []\n        loc_mask_list = []\n        for img_id, img_meta in enumerate(img_metas):\n            multi_level_guided_anchors = []\n            multi_level_loc_mask = []\n            for i in range(num_levels):\n                squares = squares_list[img_id][i]\n                shape_pred = shape_preds[i][img_id]\n                loc_pred = loc_preds[i][img_id]\n                guided_anchors, loc_mask = self._get_guided_anchors_single(\n                    squares,\n                    shape_pred,\n                    loc_pred,\n                    use_loc_filter=use_loc_filter)\n                multi_level_guided_anchors.append(guided_anchors)\n                multi_level_loc_mask.append(loc_mask)\n            guided_anchors_list.append(multi_level_guided_anchors)\n            loc_mask_list.append(multi_level_loc_mask)\n        return squares_list, guided_anchors_list, loc_mask_list\n\n    def _get_guided_anchors_single(self,\n                                   squares,\n                                   shape_pred,\n                                   loc_pred,\n                                   use_loc_filter=False):\n        \"\"\"Get guided anchors and loc masks for a single level.\n\n        Args:\n            square (tensor): Squares of a single level.\n            shape_pred (tensor): Shape predictions of a single level.\n            loc_pred (tensor): Loc predictions of a single level.\n            use_loc_filter (list[tensor]): Use loc filter or not.\n\n        Returns:\n            tuple: guided anchors, location masks\n        \"\"\"\n        # calculate location filtering mask\n        loc_pred = loc_pred.sigmoid().detach()\n        if use_loc_filter:\n            loc_mask = loc_pred >= self.loc_filter_thr\n        else:\n            loc_mask = loc_pred >= 0.0\n        mask = loc_mask.permute(1, 2, 0).expand(-1, -1, self.num_base_priors)\n        mask = mask.contiguous().view(-1)\n        # calculate guided anchors\n        squares = squares[mask]\n        anchor_deltas = shape_pred.permute(1, 2, 0).contiguous().view(\n            -1, 2).detach()[mask]\n        bbox_deltas = anchor_deltas.new_full(squares.size(), 0)\n        bbox_deltas[:, 2:] = anchor_deltas\n        guided_anchors = self.anchor_coder.decode(\n            squares, bbox_deltas, wh_ratio_clip=1e-6)\n        return guided_anchors, mask\n\n    def ga_loc_targets(self, gt_bboxes_list, featmap_sizes):\n        \"\"\"Compute location targets for guided anchoring.\n\n        Each feature map is divided into positive, negative and ignore regions.\n        - positive regions: target 1, weight 1\n        - ignore regions: target 0, weight 0\n        - negative regions: target 0, weight 0.1\n\n        Args:\n            gt_bboxes_list (list[Tensor]): Gt bboxes of each image.\n            featmap_sizes (list[tuple]): Multi level sizes of each feature\n                maps.\n\n        Returns:\n            tuple\n        \"\"\"\n        anchor_scale = self.approx_anchor_generator.octave_base_scale\n        anchor_strides = self.approx_anchor_generator.strides\n        # Currently only supports same stride in x and y direction.\n        for stride in anchor_strides:\n            assert (stride[0] == stride[1])\n        anchor_strides = [stride[0] for stride in anchor_strides]\n\n        center_ratio = self.train_cfg.center_ratio\n        ignore_ratio = self.train_cfg.ignore_ratio\n        img_per_gpu = len(gt_bboxes_list)\n        num_lvls = len(featmap_sizes)\n        r1 = (1 - center_ratio) / 2\n        r2 = (1 - ignore_ratio) / 2\n        all_loc_targets = []\n        all_loc_weights = []\n        all_ignore_map = []\n        for lvl_id in range(num_lvls):\n            h, w = featmap_sizes[lvl_id]\n            loc_targets = torch.zeros(\n                img_per_gpu,\n                1,\n                h,\n                w,\n                device=gt_bboxes_list[0].device,\n                dtype=torch.float32)\n            loc_weights = torch.full_like(loc_targets, -1)\n            ignore_map = torch.zeros_like(loc_targets)\n            all_loc_targets.append(loc_targets)\n            all_loc_weights.append(loc_weights)\n            all_ignore_map.append(ignore_map)\n        for img_id in range(img_per_gpu):\n            gt_bboxes = gt_bboxes_list[img_id]\n            scale = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0]) *\n                               (gt_bboxes[:, 3] - gt_bboxes[:, 1]))\n            min_anchor_size = scale.new_full(\n                (1, ), float(anchor_scale * anchor_strides[0]))\n            # assign gt bboxes to different feature levels w.r.t. their scales\n            target_lvls = torch.floor(\n                torch.log2(scale) - torch.log2(min_anchor_size) + 0.5)\n            target_lvls = target_lvls.clamp(min=0, max=num_lvls - 1).long()\n            for gt_id in range(gt_bboxes.size(0)):\n                lvl = target_lvls[gt_id].item()\n                # rescaled to corresponding feature map\n                gt_ = gt_bboxes[gt_id, :4] / anchor_strides[lvl]\n                # calculate ignore regions\n                ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(\n                    gt_, r2, featmap_sizes[lvl])\n                # calculate positive (center) regions\n                ctr_x1, ctr_y1, ctr_x2, ctr_y2 = calc_region(\n                    gt_, r1, featmap_sizes[lvl])\n                all_loc_targets[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,\n                                     ctr_x1:ctr_x2 + 1] = 1\n                all_loc_weights[lvl][img_id, 0, ignore_y1:ignore_y2 + 1,\n                                     ignore_x1:ignore_x2 + 1] = 0\n                all_loc_weights[lvl][img_id, 0, ctr_y1:ctr_y2 + 1,\n                                     ctr_x1:ctr_x2 + 1] = 1\n                # calculate ignore map on nearby low level feature\n                if lvl > 0:\n                    d_lvl = lvl - 1\n                    # rescaled to corresponding feature map\n                    gt_ = gt_bboxes[gt_id, :4] / anchor_strides[d_lvl]\n                    ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(\n                        gt_, r2, featmap_sizes[d_lvl])\n                    all_ignore_map[d_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,\n                                          ignore_x1:ignore_x2 + 1] = 1\n                # calculate ignore map on nearby high level feature\n                if lvl < num_lvls - 1:\n                    u_lvl = lvl + 1\n                    # rescaled to corresponding feature map\n                    gt_ = gt_bboxes[gt_id, :4] / anchor_strides[u_lvl]\n                    ignore_x1, ignore_y1, ignore_x2, ignore_y2 = calc_region(\n                        gt_, r2, featmap_sizes[u_lvl])\n                    all_ignore_map[u_lvl][img_id, 0, ignore_y1:ignore_y2 + 1,\n                                          ignore_x1:ignore_x2 + 1] = 1\n        for lvl_id in range(num_lvls):\n            # ignore negative regions w.r.t. ignore map\n            all_loc_weights[lvl_id][(all_loc_weights[lvl_id] < 0)\n                                    & (all_ignore_map[lvl_id] > 0)] = 0\n            # set negative regions with weight 0.1\n            all_loc_weights[lvl_id][all_loc_weights[lvl_id] < 0] = 0.1\n        # loc average factor to balance loss\n        loc_avg_factor = sum(\n            [t.size(0) * t.size(-1) * t.size(-2)\n             for t in all_loc_targets]) / 200\n        return all_loc_targets, all_loc_weights, loc_avg_factor\n\n    def _ga_shape_target_single(self,\n                                flat_approxs,\n                                inside_flags,\n                                flat_squares,\n                                gt_bboxes,\n                                gt_bboxes_ignore,\n                                img_meta,\n                                unmap_outputs=True):\n        \"\"\"Compute guided anchoring targets.\n\n        This function returns sampled anchors and gt bboxes directly\n        rather than calculates regression targets.\n\n        Args:\n            flat_approxs (Tensor): flat approxs of a single image,\n                shape (n, 4)\n            inside_flags (Tensor): inside flags of a single image,\n                shape (n, ).\n            flat_squares (Tensor): flat squares of a single image,\n                shape (approxs_per_octave * n, 4)\n            gt_bboxes (Tensor): Ground truth bboxes of a single image.\n            img_meta (dict): Meta info of a single image.\n            approxs_per_octave (int): number of approxs per octave\n            cfg (dict): RPN train configs.\n            unmap_outputs (bool): unmap outputs or not.\n\n        Returns:\n            tuple\n        \"\"\"\n        if not inside_flags.any():\n            return (None, ) * 5\n        # assign gt and sample anchors\n        expand_inside_flags = inside_flags[:, None].expand(\n            -1, self.approxs_per_octave).reshape(-1)\n        approxs = flat_approxs[expand_inside_flags, :]\n        squares = flat_squares[inside_flags, :]\n\n        assign_result = self.ga_assigner.assign(approxs, squares,\n                                                self.approxs_per_octave,\n                                                gt_bboxes, gt_bboxes_ignore)\n        sampling_result = self.ga_sampler.sample(assign_result, squares,\n                                                 gt_bboxes)\n\n        bbox_anchors = torch.zeros_like(squares)\n        bbox_gts = torch.zeros_like(squares)\n        bbox_weights = torch.zeros_like(squares)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            bbox_anchors[pos_inds, :] = sampling_result.pos_bboxes\n            bbox_gts[pos_inds, :] = sampling_result.pos_gt_bboxes\n            bbox_weights[pos_inds, :] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_squares.size(0)\n            bbox_anchors = unmap(bbox_anchors, num_total_anchors, inside_flags)\n            bbox_gts = unmap(bbox_gts, num_total_anchors, inside_flags)\n            bbox_weights = unmap(bbox_weights, num_total_anchors, inside_flags)\n\n        return (bbox_anchors, bbox_gts, bbox_weights, pos_inds, neg_inds)\n\n    def ga_shape_targets(self,\n                         approx_list,\n                         inside_flag_list,\n                         square_list,\n                         gt_bboxes_list,\n                         img_metas,\n                         gt_bboxes_ignore_list=None,\n                         unmap_outputs=True):\n        \"\"\"Compute guided anchoring targets.\n\n        Args:\n            approx_list (list[list]): Multi level approxs of each image.\n            inside_flag_list (list[list]): Multi level inside flags of each\n                image.\n            square_list (list[list]): Multi level squares of each image.\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.\n            unmap_outputs (bool): unmap outputs or not.\n\n        Returns:\n            tuple\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(approx_list) == len(inside_flag_list) == len(\n            square_list) == num_imgs\n        # anchor number of multi levels\n        num_level_squares = [squares.size(0) for squares in square_list[0]]\n        # concat all level anchors and flags to a single tensor\n        inside_flag_flat_list = []\n        approx_flat_list = []\n        square_flat_list = []\n        for i in range(num_imgs):\n            assert len(square_list[i]) == len(inside_flag_list[i])\n            inside_flag_flat_list.append(torch.cat(inside_flag_list[i]))\n            approx_flat_list.append(torch.cat(approx_list[i]))\n            square_flat_list.append(torch.cat(square_list[i]))\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        (all_bbox_anchors, all_bbox_gts, all_bbox_weights, pos_inds_list,\n         neg_inds_list) = multi_apply(\n             self._ga_shape_target_single,\n             approx_flat_list,\n             inside_flag_flat_list,\n             square_flat_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             img_metas,\n             unmap_outputs=unmap_outputs)\n        # no valid anchors\n        if any([bbox_anchors is None for bbox_anchors in all_bbox_anchors]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        bbox_anchors_list = images_to_levels(all_bbox_anchors,\n                                             num_level_squares)\n        bbox_gts_list = images_to_levels(all_bbox_gts, num_level_squares)\n        bbox_weights_list = images_to_levels(all_bbox_weights,\n                                             num_level_squares)\n        return (bbox_anchors_list, bbox_gts_list, bbox_weights_list,\n                num_total_pos, num_total_neg)\n\n    def loss_shape_single(self, shape_pred, bbox_anchors, bbox_gts,\n                          anchor_weights, anchor_total_num):\n        shape_pred = shape_pred.permute(0, 2, 3, 1).contiguous().view(-1, 2)\n        bbox_anchors = bbox_anchors.contiguous().view(-1, 4)\n        bbox_gts = bbox_gts.contiguous().view(-1, 4)\n        anchor_weights = anchor_weights.contiguous().view(-1, 4)\n        bbox_deltas = bbox_anchors.new_full(bbox_anchors.size(), 0)\n        bbox_deltas[:, 2:] += shape_pred\n        # filter out negative samples to speed-up weighted_bounded_iou_loss\n        inds = torch.nonzero(\n            anchor_weights[:, 0] > 0, as_tuple=False).squeeze(1)\n        bbox_deltas_ = bbox_deltas[inds]\n        bbox_anchors_ = bbox_anchors[inds]\n        bbox_gts_ = bbox_gts[inds]\n        anchor_weights_ = anchor_weights[inds]\n        pred_anchors_ = self.anchor_coder.decode(\n            bbox_anchors_, bbox_deltas_, wh_ratio_clip=1e-6)\n        loss_shape = self.loss_shape(\n            pred_anchors_,\n            bbox_gts_,\n            anchor_weights_,\n            avg_factor=anchor_total_num)\n        return loss_shape\n\n    def loss_loc_single(self, loc_pred, loc_target, loc_weight,\n                        loc_avg_factor):\n        loss_loc = self.loss_loc(\n            loc_pred.reshape(-1, 1),\n            loc_target.reshape(-1).long(),\n            loc_weight.reshape(-1),\n            avg_factor=loc_avg_factor)\n        return loss_loc\n\n    @force_fp32(\n        apply_to=('cls_scores', 'bbox_preds', 'shape_preds', 'loc_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             shape_preds,\n             loc_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.approx_anchor_generator.num_levels\n\n        device = cls_scores[0].device\n\n        # get loc targets\n        loc_targets, loc_weights, loc_avg_factor = self.ga_loc_targets(\n            gt_bboxes, featmap_sizes)\n\n        # get sampled approxes\n        approxs_list, inside_flag_list = self.get_sampled_approxs(\n            featmap_sizes, img_metas, device=device)\n        # get squares and guided anchors\n        squares_list, guided_anchors_list, _ = self.get_anchors(\n            featmap_sizes, shape_preds, loc_preds, img_metas, device=device)\n\n        # get shape targets\n        shape_targets = self.ga_shape_targets(approxs_list, inside_flag_list,\n                                              squares_list, gt_bboxes,\n                                              img_metas)\n        if shape_targets is None:\n            return None\n        (bbox_anchors_list, bbox_gts_list, anchor_weights_list, anchor_fg_num,\n         anchor_bg_num) = shape_targets\n        anchor_total_num = (\n            anchor_fg_num if not self.ga_sampling else anchor_fg_num +\n            anchor_bg_num)\n\n        # get anchor targets\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            guided_anchors_list,\n            inside_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # anchor number of multi levels\n        num_level_anchors = [\n            anchors.size(0) for anchors in guided_anchors_list[0]\n        ]\n        # concat all level anchors to a single tensor\n        concat_anchor_list = []\n        for i in range(len(guided_anchors_list)):\n            concat_anchor_list.append(torch.cat(guided_anchors_list[i]))\n        all_anchor_list = images_to_levels(concat_anchor_list,\n                                           num_level_anchors)\n\n        # get classification and bbox regression losses\n        losses_cls, losses_bbox = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            all_anchor_list,\n            labels_list,\n            label_weights_list,\n            bbox_targets_list,\n            bbox_weights_list,\n            num_total_samples=num_total_samples)\n\n        # get anchor location loss\n        losses_loc = []\n        for i in range(len(loc_preds)):\n            loss_loc = self.loss_loc_single(\n                loc_preds[i],\n                loc_targets[i],\n                loc_weights[i],\n                loc_avg_factor=loc_avg_factor)\n            losses_loc.append(loss_loc)\n\n        # get anchor shape loss\n        losses_shape = []\n        for i in range(len(shape_preds)):\n            loss_shape = self.loss_shape_single(\n                shape_preds[i],\n                bbox_anchors_list[i],\n                bbox_gts_list[i],\n                anchor_weights_list[i],\n                anchor_total_num=anchor_total_num)\n            losses_shape.append(loss_shape)\n\n        return dict(\n            loss_cls=losses_cls,\n            loss_bbox=losses_bbox,\n            loss_shape=losses_shape,\n            loss_loc=losses_loc)\n\n    @force_fp32(\n        apply_to=('cls_scores', 'bbox_preds', 'shape_preds', 'loc_preds'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   shape_preds,\n                   loc_preds,\n                   img_metas,\n                   cfg=None,\n                   rescale=False):\n        assert len(cls_scores) == len(bbox_preds) == len(shape_preds) == len(\n            loc_preds)\n        num_levels = len(cls_scores)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        device = cls_scores[0].device\n        # get guided anchors\n        _, guided_anchors, loc_masks = self.get_anchors(\n            featmap_sizes,\n            shape_preds,\n            loc_preds,\n            img_metas,\n            use_loc_filter=not self.training,\n            device=device)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_pred_list = [\n                bbox_preds[i][img_id].detach() for i in range(num_levels)\n            ]\n            guided_anchor_list = [\n                guided_anchors[img_id][i].detach() for i in range(num_levels)\n            ]\n            loc_mask_list = [\n                loc_masks[img_id][i].detach() for i in range(num_levels)\n            ]\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            proposals = self._get_bboxes_single(cls_score_list, bbox_pred_list,\n                                                guided_anchor_list,\n                                                loc_mask_list, img_shape,\n                                                scale_factor, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_scores,\n                           bbox_preds,\n                           mlvl_anchors,\n                           mlvl_masks,\n                           img_shape,\n                           scale_factor,\n                           cfg,\n                           rescale=False):\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        for cls_score, bbox_pred, anchors, mask in zip(cls_scores, bbox_preds,\n                                                       mlvl_anchors,\n                                                       mlvl_masks):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            # if no location is kept, end.\n            if mask.sum() == 0:\n                continue\n            # reshape scores and bbox_pred\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            # filter scores, bbox_pred w.r.t. mask.\n            # anchors are filtered in get_anchors() beforehand.\n            scores = scores[mask, :]\n            bbox_pred = bbox_pred[mask, :]\n            if scores.dim() == 0:\n                anchors = anchors.unsqueeze(0)\n                scores = scores.unsqueeze(0)\n                bbox_pred = bbox_pred.unsqueeze(0)\n            # filter anchors, bbox_pred, scores w.r.t. scores\n            nms_pre = cfg.get('nms_pre', -1)\n            if nms_pre > 0 and scores.shape[0] > nms_pre:\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    # remind that we set FG labels to [0, num_class-1]\n                    # since mmdet v2.0\n                    # BG cat_id: num_class\n                    max_scores, _ = scores[:, :-1].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n            bboxes = self.bbox_coder.decode(\n                anchors, bbox_pred, max_shape=img_shape)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        if rescale:\n            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)\n        mlvl_scores = torch.cat(mlvl_scores)\n        if self.use_sigmoid_cls:\n            # Add a dummy background class to the backend when using sigmoid\n            # remind that we set FG labels to [0, num_class-1] since mmdet v2.0\n            # BG cat_id: num_class\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n        # multi class NMS\n        det_bboxes, det_labels = multiclass_nms(mlvl_bboxes, mlvl_scores,\n                                                cfg.score_thr, cfg.nms,\n                                                cfg.max_per_img)\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/lad_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import bbox_overlaps, multi_apply\nfrom ..builder import HEADS\nfrom .paa_head import PAAHead, levels_to_images\n\n\n@HEADS.register_module()\nclass LADHead(PAAHead):\n    \"\"\"Label Assignment Head from the paper: `Improving Object Detection by\n    Label Assignment Distillation <https://arxiv.org/pdf/2108.10520.pdf>`_\"\"\"\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'iou_preds'))\n    def get_label_assignment(self,\n                             cls_scores,\n                             bbox_preds,\n                             iou_preds,\n                             gt_bboxes,\n                             gt_labels,\n                             img_metas,\n                             gt_bboxes_ignore=None):\n        \"\"\"Get label assignment (from teacher).\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level.\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            iou_preds (list[Tensor]): iou_preds for each scale\n                level with shape (N, num_anchors * 1, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding\n                boxes can be ignored when are computing the loss.\n\n        Returns:\n            tuple: Returns a tuple containing label assignment variables.\n\n                - labels (Tensor): Labels of all anchors, each with\n                    shape (num_anchors,).\n                - labels_weight (Tensor): Label weights of all anchor.\n                    each with shape (num_anchors,).\n                - bboxes_target (Tensor): BBox targets of all anchors.\n                    each with shape (num_anchors, 4).\n                - bboxes_weight (Tensor): BBox weights of all anchors.\n                    each with shape (num_anchors, 4).\n                - pos_inds_flatten (Tensor): Contains all index of positive\n                    sample in all anchor.\n                - pos_anchors (Tensor): Positive anchors.\n                - num_pos (int): Number of positive anchors.\n        \"\"\"\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n        )\n        (labels, labels_weight, bboxes_target, bboxes_weight, pos_inds,\n         pos_gt_index) = cls_reg_targets\n        cls_scores = levels_to_images(cls_scores)\n        cls_scores = [\n            item.reshape(-1, self.cls_out_channels) for item in cls_scores\n        ]\n        bbox_preds = levels_to_images(bbox_preds)\n        bbox_preds = [item.reshape(-1, 4) for item in bbox_preds]\n        pos_losses_list, = multi_apply(self.get_pos_loss, anchor_list,\n                                       cls_scores, bbox_preds, labels,\n                                       labels_weight, bboxes_target,\n                                       bboxes_weight, pos_inds)\n\n        with torch.no_grad():\n            reassign_labels, reassign_label_weight, \\\n                reassign_bbox_weights, num_pos = multi_apply(\n                    self.paa_reassign,\n                    pos_losses_list,\n                    labels,\n                    labels_weight,\n                    bboxes_weight,\n                    pos_inds,\n                    pos_gt_index,\n                    anchor_list)\n            num_pos = sum(num_pos)\n        # convert all tensor list to a flatten tensor\n        labels = torch.cat(reassign_labels, 0).view(-1)\n        flatten_anchors = torch.cat(\n            [torch.cat(item, 0) for item in anchor_list])\n        labels_weight = torch.cat(reassign_label_weight, 0).view(-1)\n        bboxes_target = torch.cat(bboxes_target,\n                                  0).view(-1, bboxes_target[0].size(-1))\n\n        pos_inds_flatten = ((labels >= 0)\n                            &\n                            (labels < self.num_classes)).nonzero().reshape(-1)\n\n        if num_pos:\n            pos_anchors = flatten_anchors[pos_inds_flatten]\n        else:\n            pos_anchors = None\n\n        label_assignment_results = (labels, labels_weight, bboxes_target,\n                                    bboxes_weight, pos_inds_flatten,\n                                    pos_anchors, num_pos)\n        return label_assignment_results\n\n    def forward_train(self,\n                      x,\n                      label_assignment_results,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"Forward train with the available label assignment (student receives\n        from teacher).\n\n        Args:\n            x (list[Tensor]): Features from FPN.\n            label_assignment_results (tuple): As the outputs defined in the\n                function `self.get_label_assignment`.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n\n        Returns:\n            losses: (dict[str, Tensor]): A dictionary of loss components.\n        \"\"\"\n        outs = self(x)\n        if gt_labels is None:\n            loss_inputs = outs + (gt_bboxes, img_metas)\n        else:\n            loss_inputs = outs + (gt_bboxes, gt_labels, img_metas)\n        losses = self.loss(\n            *loss_inputs,\n            gt_bboxes_ignore=gt_bboxes_ignore,\n            label_assignment_results=label_assignment_results)\n        return losses\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'iou_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             iou_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None,\n             label_assignment_results=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            iou_preds (list[Tensor]): iou_preds for each scale\n                level with shape (N, num_anchors * 1, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding\n                boxes can be ignored when are computing the loss.\n            label_assignment_results (tuple): As the outputs defined in the\n                function `self.get_label_assignment`.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss gmm_assignment.\n        \"\"\"\n\n        (labels, labels_weight, bboxes_target, bboxes_weight, pos_inds_flatten,\n         pos_anchors, num_pos) = label_assignment_results\n\n        cls_scores = levels_to_images(cls_scores)\n        cls_scores = [\n            item.reshape(-1, self.cls_out_channels) for item in cls_scores\n        ]\n        bbox_preds = levels_to_images(bbox_preds)\n        bbox_preds = [item.reshape(-1, 4) for item in bbox_preds]\n        iou_preds = levels_to_images(iou_preds)\n        iou_preds = [item.reshape(-1, 1) for item in iou_preds]\n\n        # convert all tensor list to a flatten tensor\n        cls_scores = torch.cat(cls_scores, 0).view(-1, cls_scores[0].size(-1))\n        bbox_preds = torch.cat(bbox_preds, 0).view(-1, bbox_preds[0].size(-1))\n        iou_preds = torch.cat(iou_preds, 0).view(-1, iou_preds[0].size(-1))\n\n        losses_cls = self.loss_cls(\n            cls_scores,\n            labels,\n            labels_weight,\n            avg_factor=max(num_pos, len(img_metas)))  # avoid num_pos=0\n        if num_pos:\n            pos_bbox_pred = self.bbox_coder.decode(\n                pos_anchors, bbox_preds[pos_inds_flatten])\n            pos_bbox_target = bboxes_target[pos_inds_flatten]\n            iou_target = bbox_overlaps(\n                pos_bbox_pred.detach(), pos_bbox_target, is_aligned=True)\n            losses_iou = self.loss_centerness(\n                iou_preds[pos_inds_flatten],\n                iou_target.unsqueeze(-1),\n                avg_factor=num_pos)\n            losses_bbox = self.loss_bbox(\n                pos_bbox_pred, pos_bbox_target, avg_factor=num_pos)\n\n        else:\n            losses_iou = iou_preds.sum() * 0\n            losses_bbox = bbox_preds.sum() * 0\n\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_iou=losses_iou)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/ld_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import bbox_overlaps, multi_apply, reduce_mean\nfrom ..builder import HEADS, build_loss\nfrom .gfl_head import GFLHead\n\n\n@HEADS.register_module()\nclass LDHead(GFLHead):\n    \"\"\"Localization distillation Head. (Short description)\n\n    It utilizes the learned bbox distributions to transfer the localization\n    dark knowledge from teacher to student. Original paper: `Localization\n    Distillation for Object Detection. <https://arxiv.org/abs/2102.12252>`_\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        loss_ld (dict): Config of Localization Distillation Loss (LD),\n            T is the temperature for distillation.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 loss_ld=dict(\n                     type='LocalizationDistillationLoss',\n                     loss_weight=0.25,\n                     T=10),\n                 **kwargs):\n\n        super(LDHead, self).__init__(num_classes, in_channels, **kwargs)\n        self.loss_ld = build_loss(loss_ld)\n\n    def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights,\n                    bbox_targets, stride, soft_targets, num_total_samples):\n        \"\"\"Compute loss of a single scale level.\n\n        Args:\n            anchors (Tensor): Box reference for each scale level with shape\n                (N, num_total_anchors, 4).\n            cls_score (Tensor): Cls and quality joint scores for each scale\n                level has shape (N, num_classes, H, W).\n            bbox_pred (Tensor): Box distribution logits for each scale\n                level with shape (N, 4*(n+1), H, W), n is max value of integral\n                set.\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors)\n            bbox_targets (Tensor): BBox regression targets of each anchor\n                weight shape (N, num_total_anchors, 4).\n            stride (tuple): Stride in this scale level.\n            num_total_samples (int): Number of positive samples that is\n                reduced over all GPUs.\n\n        Returns:\n            dict[tuple, Tensor]: Loss components and weight targets.\n        \"\"\"\n        assert stride[0] == stride[1], 'h stride is not equal to w stride!'\n        anchors = anchors.reshape(-1, 4)\n        cls_score = cls_score.permute(0, 2, 3,\n                                      1).reshape(-1, self.cls_out_channels)\n        bbox_pred = bbox_pred.permute(0, 2, 3,\n                                      1).reshape(-1, 4 * (self.reg_max + 1))\n        soft_targets = soft_targets.permute(0, 2, 3,\n                                            1).reshape(-1,\n                                                       4 * (self.reg_max + 1))\n\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    & (labels < bg_class_ind)).nonzero().squeeze(1)\n        score = label_weights.new_zeros(labels.shape)\n\n        if len(pos_inds) > 0:\n            pos_bbox_targets = bbox_targets[pos_inds]\n            pos_bbox_pred = bbox_pred[pos_inds]\n            pos_anchors = anchors[pos_inds]\n            pos_anchor_centers = self.anchor_center(pos_anchors) / stride[0]\n\n            weight_targets = cls_score.detach().sigmoid()\n            weight_targets = weight_targets.max(dim=1)[0][pos_inds]\n            pos_bbox_pred_corners = self.integral(pos_bbox_pred)\n            pos_decode_bbox_pred = self.bbox_coder.decode(\n                pos_anchor_centers, pos_bbox_pred_corners)\n            pos_decode_bbox_targets = pos_bbox_targets / stride[0]\n            score[pos_inds] = bbox_overlaps(\n                pos_decode_bbox_pred.detach(),\n                pos_decode_bbox_targets,\n                is_aligned=True)\n            pred_corners = pos_bbox_pred.reshape(-1, self.reg_max + 1)\n            pos_soft_targets = soft_targets[pos_inds]\n            soft_corners = pos_soft_targets.reshape(-1, self.reg_max + 1)\n\n            target_corners = self.bbox_coder.encode(pos_anchor_centers,\n                                                    pos_decode_bbox_targets,\n                                                    self.reg_max).reshape(-1)\n\n            # regression loss\n            loss_bbox = self.loss_bbox(\n                pos_decode_bbox_pred,\n                pos_decode_bbox_targets,\n                weight=weight_targets,\n                avg_factor=1.0)\n\n            # dfl loss\n            loss_dfl = self.loss_dfl(\n                pred_corners,\n                target_corners,\n                weight=weight_targets[:, None].expand(-1, 4).reshape(-1),\n                avg_factor=4.0)\n\n            # ld loss\n            loss_ld = self.loss_ld(\n                pred_corners,\n                soft_corners,\n                weight=weight_targets[:, None].expand(-1, 4).reshape(-1),\n                avg_factor=4.0)\n\n        else:\n            loss_ld = bbox_pred.sum() * 0\n            loss_bbox = bbox_pred.sum() * 0\n            loss_dfl = bbox_pred.sum() * 0\n            weight_targets = bbox_pred.new_tensor(0)\n\n        # cls (qfl) loss\n        loss_cls = self.loss_cls(\n            cls_score, (labels, score),\n            weight=label_weights,\n            avg_factor=num_total_samples)\n\n        return loss_cls, loss_bbox, loss_dfl, loss_ld, weight_targets.sum()\n\n    def forward_train(self,\n                      x,\n                      out_teacher,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels=None,\n                      gt_bboxes_ignore=None,\n                      proposal_cfg=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            x (list[Tensor]): Features from FPN.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            proposal_cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used\n\n        Returns:\n            tuple[dict, list]: The loss components and proposals of each image.\n\n            - losses (dict[str, Tensor]): A dictionary of loss components.\n            - proposal_list (list[Tensor]): Proposals of each image.\n        \"\"\"\n        outs = self(x)\n        soft_target = out_teacher[1]\n        if gt_labels is None:\n            loss_inputs = outs + (gt_bboxes, soft_target, img_metas)\n        else:\n            loss_inputs = outs + (gt_bboxes, gt_labels, soft_target, img_metas)\n        losses = self.loss(*loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n        if proposal_cfg is None:\n            return losses\n        else:\n            proposal_list = self.get_bboxes(*outs, img_metas, cfg=proposal_cfg)\n            return losses, proposal_list\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             soft_target,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Cls and quality scores for each scale\n                level has shape (N, num_classes, H, W).\n            bbox_preds (list[Tensor]): Box distribution logits for each scale\n                level with shape (N, 4*(n+1), H, W), n is max value of integral\n                set.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n\n        (anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets\n\n        num_total_samples = reduce_mean(\n            torch.tensor(num_total_pos, dtype=torch.float,\n                         device=device)).item()\n        num_total_samples = max(num_total_samples, 1.0)\n\n        losses_cls, losses_bbox, losses_dfl, losses_ld, \\\n            avg_factor = multi_apply(\n                self.loss_single,\n                anchor_list,\n                cls_scores,\n                bbox_preds,\n                labels_list,\n                label_weights_list,\n                bbox_targets_list,\n                self.prior_generator.strides,\n                soft_target,\n                num_total_samples=num_total_samples)\n\n        avg_factor = sum(avg_factor) + 1e-6\n        avg_factor = reduce_mean(avg_factor).item()\n        losses_bbox = [x / avg_factor for x in losses_bbox]\n        losses_dfl = [x / avg_factor for x in losses_dfl]\n        return dict(\n            loss_cls=losses_cls,\n            loss_bbox=losses_bbox,\n            loss_dfl=losses_dfl,\n            loss_ld=losses_ld)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/mask2former_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Conv2d, build_plugin_layer, caffe2_xavier_init\nfrom mmcv.cnn.bricks.transformer import (build_positional_encoding,\n                                         build_transformer_layer_sequence)\nfrom mmcv.ops import point_sample\nfrom mmcv.runner import ModuleList\n\nfrom mmdet.core import build_assigner, build_sampler, reduce_mean\nfrom mmdet.models.utils import get_uncertain_point_coords_with_randomness\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_head import AnchorFreeHead\nfrom .maskformer_head import MaskFormerHead\n\n\n@HEADS.register_module()\nclass Mask2FormerHead(MaskFormerHead):\n    \"\"\"Implements the Mask2Former head.\n\n    See `Masked-attention Mask Transformer for Universal Image\n    Segmentation <https://arxiv.org/pdf/2112.01527>`_ for details.\n\n    Args:\n        in_channels (list[int]): Number of channels in the input feature map.\n        feat_channels (int): Number of channels for features.\n        out_channels (int): Number of channels for output.\n        num_things_classes (int): Number of things.\n        num_stuff_classes (int): Number of stuff.\n        num_queries (int): Number of query in Transformer decoder.\n        pixel_decoder (:obj:`mmcv.ConfigDict` | dict): Config for pixel\n            decoder. Defaults to None.\n        enforce_decoder_input_project (bool, optional): Whether to add\n            a layer to change the embed_dim of tranformer encoder in\n            pixel decoder to the embed_dim of transformer decoder.\n            Defaults to False.\n        transformer_decoder (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer decoder. Defaults to None.\n        positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer decoder position encoding. Defaults to None.\n        loss_cls (:obj:`mmcv.ConfigDict` | dict): Config of the classification\n            loss. Defaults to None.\n        loss_mask (:obj:`mmcv.ConfigDict` | dict): Config of the mask loss.\n            Defaults to None.\n        loss_dice (:obj:`mmcv.ConfigDict` | dict): Config of the dice loss.\n            Defaults to None.\n        train_cfg (:obj:`mmcv.ConfigDict` | dict): Training config of\n            Mask2Former head.\n        test_cfg (:obj:`mmcv.ConfigDict` | dict): Testing config of\n            Mask2Former head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 feat_channels,\n                 out_channels,\n                 num_things_classes=80,\n                 num_stuff_classes=53,\n                 num_queries=100,\n                 num_transformer_feat_level=3,\n                 pixel_decoder=None,\n                 enforce_decoder_input_project=False,\n                 transformer_decoder=None,\n                 positional_encoding=None,\n                 loss_cls=None,\n                 loss_mask=None,\n                 loss_dice=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 **kwargs):\n        super(AnchorFreeHead, self).__init__(init_cfg)\n        self.num_things_classes = num_things_classes\n        self.num_stuff_classes = num_stuff_classes\n        self.num_classes = self.num_things_classes + self.num_stuff_classes\n        self.num_queries = num_queries\n        self.num_transformer_feat_level = num_transformer_feat_level\n        self.num_heads = transformer_decoder.transformerlayers.\\\n            attn_cfgs.num_heads\n        self.num_transformer_decoder_layers = transformer_decoder.num_layers\n        assert pixel_decoder.encoder.transformerlayers.\\\n            attn_cfgs.num_levels == num_transformer_feat_level\n        pixel_decoder_ = copy.deepcopy(pixel_decoder)\n        pixel_decoder_.update(\n            in_channels=in_channels,\n            feat_channels=feat_channels,\n            out_channels=out_channels)\n        self.pixel_decoder = build_plugin_layer(pixel_decoder_)[1]\n        self.transformer_decoder = build_transformer_layer_sequence(\n            transformer_decoder)\n        self.decoder_embed_dims = self.transformer_decoder.embed_dims\n\n        self.decoder_input_projs = ModuleList()\n        # from low resolution to high resolution\n        for _ in range(num_transformer_feat_level):\n            if (self.decoder_embed_dims != feat_channels\n                    or enforce_decoder_input_project):\n                self.decoder_input_projs.append(\n                    Conv2d(\n                        feat_channels, self.decoder_embed_dims, kernel_size=1))\n            else:\n                self.decoder_input_projs.append(nn.Identity())\n        self.decoder_positional_encoding = build_positional_encoding(\n            positional_encoding)\n        self.query_embed = nn.Embedding(self.num_queries, feat_channels)\n        self.query_feat = nn.Embedding(self.num_queries, feat_channels)\n        # from low resolution to high resolution\n        self.level_embed = nn.Embedding(self.num_transformer_feat_level,\n                                        feat_channels)\n\n        self.cls_embed = nn.Linear(feat_channels, self.num_classes + 1)\n        self.mask_embed = nn.Sequential(\n            nn.Linear(feat_channels, feat_channels), nn.ReLU(inplace=True),\n            nn.Linear(feat_channels, feat_channels), nn.ReLU(inplace=True),\n            nn.Linear(feat_channels, out_channels))\n\n        self.test_cfg = test_cfg\n        self.train_cfg = train_cfg\n        if train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            self.sampler = build_sampler(self.train_cfg.sampler, context=self)\n            self.num_points = self.train_cfg.get('num_points', 12544)\n            self.oversample_ratio = self.train_cfg.get('oversample_ratio', 3.0)\n            self.importance_sample_ratio = self.train_cfg.get(\n                'importance_sample_ratio', 0.75)\n\n        self.class_weight = loss_cls.class_weight\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_mask = build_loss(loss_mask)\n        self.loss_dice = build_loss(loss_dice)\n\n    def init_weights(self):\n        for m in self.decoder_input_projs:\n            if isinstance(m, Conv2d):\n                caffe2_xavier_init(m, bias=0)\n\n        self.pixel_decoder.init_weights()\n\n        for p in self.transformer_decoder.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_normal_(p)\n\n    def _get_target_single(self, cls_score, mask_pred, gt_labels, gt_masks,\n                           img_metas):\n        \"\"\"Compute classification and mask targets for one image.\n\n        Args:\n            cls_score (Tensor): Mask score logits from a single decoder layer\n                for one image. Shape (num_queries, cls_out_channels).\n            mask_pred (Tensor): Mask logits for a single decoder layer for one\n                image. Shape (num_queries, h, w).\n            gt_labels (Tensor): Ground truth class indices for one image with\n                shape (num_gts, ).\n            gt_masks (Tensor): Ground truth mask for each image, each with\n                shape (num_gts, h, w).\n            img_metas (dict): Image informtation.\n\n        Returns:\n            tuple[Tensor]: A tuple containing the following for one image.\n\n                - labels (Tensor): Labels of each image. \\\n                    shape (num_queries, ).\n                - label_weights (Tensor): Label weights of each image. \\\n                    shape (num_queries, ).\n                - mask_targets (Tensor): Mask targets of each image. \\\n                    shape (num_queries, h, w).\n                - mask_weights (Tensor): Mask weights of each image. \\\n                    shape (num_queries, ).\n                - pos_inds (Tensor): Sampled positive indices for each \\\n                    image.\n                - neg_inds (Tensor): Sampled negative indices for each \\\n                    image.\n        \"\"\"\n        # sample points\n        num_queries = cls_score.shape[0]\n        num_gts = gt_labels.shape[0]\n\n        point_coords = torch.rand((1, self.num_points, 2),\n                                  device=cls_score.device)\n        # shape (num_queries, num_points)\n        mask_points_pred = point_sample(\n            mask_pred.unsqueeze(1), point_coords.repeat(num_queries, 1,\n                                                        1)).squeeze(1)\n        # shape (num_gts, num_points)\n        gt_points_masks = point_sample(\n            gt_masks.unsqueeze(1).float(), point_coords.repeat(num_gts, 1,\n                                                               1)).squeeze(1)\n\n        # assign and sample\n        assign_result = self.assigner.assign(cls_score, mask_points_pred,\n                                             gt_labels, gt_points_masks,\n                                             img_metas)\n        sampling_result = self.sampler.sample(assign_result, mask_pred,\n                                              gt_masks)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n\n        # label target\n        labels = gt_labels.new_full((self.num_queries, ),\n                                    self.num_classes,\n                                    dtype=torch.long)\n        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        label_weights = gt_labels.new_ones((self.num_queries, ))\n\n        # mask target\n        mask_targets = gt_masks[sampling_result.pos_assigned_gt_inds]\n        mask_weights = mask_pred.new_zeros((self.num_queries, ))\n        mask_weights[pos_inds] = 1.0\n\n        return (labels, label_weights, mask_targets, mask_weights, pos_inds,\n                neg_inds)\n\n    def loss_single(self, cls_scores, mask_preds, gt_labels_list,\n                    gt_masks_list, img_metas):\n        \"\"\"Loss function for outputs from a single decoder layer.\n\n        Args:\n            cls_scores (Tensor): Mask score logits from a single decoder layer\n                for all images. Shape (batch_size, num_queries,\n                cls_out_channels). Note `cls_out_channels` should includes\n                background.\n            mask_preds (Tensor): Mask logits for a pixel decoder for all\n                images. Shape (batch_size, num_queries, h, w).\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image, each with shape (num_gts, ).\n            gt_masks_list (list[Tensor]): Ground truth mask for each image,\n                each with shape (num_gts, h, w).\n            img_metas (list[dict]): List of image meta information.\n\n        Returns:\n            tuple[Tensor]: Loss components for outputs from a single \\\n                decoder layer.\n        \"\"\"\n        num_imgs = cls_scores.size(0)\n        cls_scores_list = [cls_scores[i] for i in range(num_imgs)]\n        mask_preds_list = [mask_preds[i] for i in range(num_imgs)]\n        (labels_list, label_weights_list, mask_targets_list, mask_weights_list,\n         num_total_pos,\n         num_total_neg) = self.get_targets(cls_scores_list, mask_preds_list,\n                                           gt_labels_list, gt_masks_list,\n                                           img_metas)\n        # shape (batch_size, num_queries)\n        labels = torch.stack(labels_list, dim=0)\n        # shape (batch_size, num_queries)\n        label_weights = torch.stack(label_weights_list, dim=0)\n        # shape (num_total_gts, h, w)\n        mask_targets = torch.cat(mask_targets_list, dim=0)\n        # shape (batch_size, num_queries)\n        mask_weights = torch.stack(mask_weights_list, dim=0)\n\n        # classfication loss\n        # shape (batch_size * num_queries, )\n        cls_scores = cls_scores.flatten(0, 1)\n        labels = labels.flatten(0, 1)\n        label_weights = label_weights.flatten(0, 1)\n\n        class_weight = cls_scores.new_tensor(self.class_weight)\n        loss_cls = self.loss_cls(\n            cls_scores,\n            labels,\n            label_weights,\n            avg_factor=class_weight[labels].sum())\n\n        num_total_masks = reduce_mean(cls_scores.new_tensor([num_total_pos]))\n        num_total_masks = max(num_total_masks, 1)\n\n        # extract positive ones\n        # shape (batch_size, num_queries, h, w) -> (num_total_gts, h, w)\n        mask_preds = mask_preds[mask_weights > 0]\n\n        if mask_targets.shape[0] == 0:\n            # zero match\n            loss_dice = mask_preds.sum()\n            loss_mask = mask_preds.sum()\n            return loss_cls, loss_mask, loss_dice\n\n        with torch.no_grad():\n            points_coords = get_uncertain_point_coords_with_randomness(\n                mask_preds.unsqueeze(1), None, self.num_points,\n                self.oversample_ratio, self.importance_sample_ratio)\n            # shape (num_total_gts, h, w) -> (num_total_gts, num_points)\n            mask_point_targets = point_sample(\n                mask_targets.unsqueeze(1).float(), points_coords).squeeze(1)\n        # shape (num_queries, h, w) -> (num_queries, num_points)\n        mask_point_preds = point_sample(\n            mask_preds.unsqueeze(1), points_coords).squeeze(1)\n\n        # dice loss\n        loss_dice = self.loss_dice(\n            mask_point_preds, mask_point_targets, avg_factor=num_total_masks)\n\n        # mask loss\n        # shape (num_queries, num_points) -> (num_queries * num_points, )\n        mask_point_preds = mask_point_preds.reshape(-1)\n        # shape (num_total_gts, num_points) -> (num_total_gts * num_points, )\n        mask_point_targets = mask_point_targets.reshape(-1)\n        loss_mask = self.loss_mask(\n            mask_point_preds,\n            mask_point_targets,\n            avg_factor=num_total_masks * self.num_points)\n\n        return loss_cls, loss_mask, loss_dice\n\n    def forward_head(self, decoder_out, mask_feature, attn_mask_target_size):\n        \"\"\"Forward for head part which is called after every decoder layer.\n\n        Args:\n            decoder_out (Tensor): in shape (num_queries, batch_size, c).\n            mask_feature (Tensor): in shape (batch_size, c, h, w).\n            attn_mask_target_size (tuple[int, int]): target attention\n                mask size.\n\n        Returns:\n            tuple: A tuple contain three elements.\n\n            - cls_pred (Tensor): Classification scores in shape \\\n                (batch_size, num_queries, cls_out_channels). \\\n                Note `cls_out_channels` should includes background.\n            - mask_pred (Tensor): Mask scores in shape \\\n                (batch_size, num_queries,h, w).\n            - attn_mask (Tensor): Attention mask in shape \\\n                (batch_size * num_heads, num_queries, h, w).\n        \"\"\"\n        decoder_out = self.transformer_decoder.post_norm(decoder_out)\n        decoder_out = decoder_out.transpose(0, 1)\n        # shape (batch_size, num_queries, c)\n        cls_pred = self.cls_embed(decoder_out)\n        # shape (batch_size, num_queries, c)\n        mask_embed = self.mask_embed(decoder_out)\n        # shape (batch_size, num_queries, h, w)\n        mask_pred = torch.einsum('bqc,bchw->bqhw', mask_embed, mask_feature)\n        attn_mask = F.interpolate(\n            mask_pred,\n            attn_mask_target_size,\n            mode='bilinear',\n            align_corners=False)\n        # shape (batch_size, num_queries, h, w) ->\n        #   (batch_size * num_head, num_queries, h*w)\n        attn_mask = attn_mask.flatten(2).unsqueeze(1).repeat(\n            (1, self.num_heads, 1, 1)).flatten(0, 1)\n        attn_mask = attn_mask.sigmoid() < 0.5\n        attn_mask = attn_mask.detach()\n\n        return cls_pred, mask_pred, attn_mask\n\n    def forward(self, feats, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            feats (list[Tensor]): Multi scale Features from the\n                upstream network, each is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple: A tuple contains two elements.\n\n            - cls_pred_list (list[Tensor)]: Classification logits \\\n                for each decoder layer. Each is a 3D-tensor with shape \\\n                (batch_size, num_queries, cls_out_channels). \\\n                Note `cls_out_channels` should includes background.\n            - mask_pred_list (list[Tensor]): Mask logits for each \\\n                decoder layer. Each with shape (batch_size, num_queries, \\\n                 h, w).\n        \"\"\"\n        batch_size = len(img_metas)\n        mask_features, multi_scale_memorys = self.pixel_decoder(feats)\n        # multi_scale_memorys (from low resolution to high resolution)\n        decoder_inputs = []\n        decoder_positional_encodings = []\n        for i in range(self.num_transformer_feat_level):\n            decoder_input = self.decoder_input_projs[i](multi_scale_memorys[i])\n            # shape (batch_size, c, h, w) -> (h*w, batch_size, c)\n            decoder_input = decoder_input.flatten(2).permute(2, 0, 1)\n            level_embed = self.level_embed.weight[i].view(1, 1, -1)\n            decoder_input = decoder_input + level_embed\n            # shape (batch_size, c, h, w) -> (h*w, batch_size, c)\n            mask = decoder_input.new_zeros(\n                (batch_size, ) + multi_scale_memorys[i].shape[-2:],\n                dtype=torch.bool)\n            decoder_positional_encoding = self.decoder_positional_encoding(\n                mask)\n            decoder_positional_encoding = decoder_positional_encoding.flatten(\n                2).permute(2, 0, 1)\n            decoder_inputs.append(decoder_input)\n            decoder_positional_encodings.append(decoder_positional_encoding)\n        # shape (num_queries, c) -> (num_queries, batch_size, c)\n        query_feat = self.query_feat.weight.unsqueeze(1).repeat(\n            (1, batch_size, 1))\n        query_embed = self.query_embed.weight.unsqueeze(1).repeat(\n            (1, batch_size, 1))\n\n        cls_pred_list = []\n        mask_pred_list = []\n        cls_pred, mask_pred, attn_mask = self.forward_head(\n            query_feat, mask_features, multi_scale_memorys[0].shape[-2:])\n        cls_pred_list.append(cls_pred)\n        mask_pred_list.append(mask_pred)\n\n        for i in range(self.num_transformer_decoder_layers):\n            level_idx = i % self.num_transformer_feat_level\n            # if a mask is all True(all background), then set it all False.\n            attn_mask[torch.where(\n                attn_mask.sum(-1) == attn_mask.shape[-1])] = False\n\n            # cross_attn + self_attn\n            layer = self.transformer_decoder.layers[i]\n            attn_masks = [attn_mask, None]\n            query_feat = layer(\n                query=query_feat,\n                key=decoder_inputs[level_idx],\n                value=decoder_inputs[level_idx],\n                query_pos=query_embed,\n                key_pos=decoder_positional_encodings[level_idx],\n                attn_masks=attn_masks,\n                query_key_padding_mask=None,\n                # here we do not apply masking on padded region\n                key_padding_mask=None)\n            cls_pred, mask_pred, attn_mask = self.forward_head(\n                query_feat, mask_features, multi_scale_memorys[\n                    (i + 1) % self.num_transformer_feat_level].shape[-2:])\n\n            cls_pred_list.append(cls_pred)\n            mask_pred_list.append(mask_pred)\n\n        return cls_pred_list, mask_pred_list\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/maskformer_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import Conv2d, build_plugin_layer, caffe2_xavier_init\nfrom mmcv.cnn.bricks.transformer import (build_positional_encoding,\n                                         build_transformer_layer_sequence)\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import build_assigner, build_sampler, multi_apply, reduce_mean\nfrom mmdet.models.utils import preprocess_panoptic_gt\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_head import AnchorFreeHead\n\n\n@HEADS.register_module()\nclass MaskFormerHead(AnchorFreeHead):\n    \"\"\"Implements the MaskFormer head.\n\n    See `Per-Pixel Classification is Not All You Need for Semantic\n    Segmentation <https://arxiv.org/pdf/2107.06278>`_ for details.\n\n    Args:\n        in_channels (list[int]): Number of channels in the input feature map.\n        feat_channels (int): Number of channels for feature.\n        out_channels (int): Number of channels for output.\n        num_things_classes (int): Number of things.\n        num_stuff_classes (int): Number of stuff.\n        num_queries (int): Number of query in Transformer.\n        pixel_decoder (:obj:`mmcv.ConfigDict` | dict): Config for pixel\n            decoder. Defaults to None.\n        enforce_decoder_input_project (bool, optional): Whether to add a layer\n            to change the embed_dim of tranformer encoder in pixel decoder to\n            the embed_dim of transformer decoder. Defaults to False.\n        transformer_decoder (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer decoder. Defaults to None.\n        positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer decoder position encoding. Defaults to None.\n        loss_cls (:obj:`mmcv.ConfigDict` | dict): Config of the classification\n            loss. Defaults to `CrossEntropyLoss`.\n        loss_mask (:obj:`mmcv.ConfigDict` | dict): Config of the mask loss.\n            Defaults to `FocalLoss`.\n        loss_dice (:obj:`mmcv.ConfigDict` | dict): Config of the dice loss.\n            Defaults to `DiceLoss`.\n        train_cfg (:obj:`mmcv.ConfigDict` | dict): Training config of\n            Maskformer head.\n        test_cfg (:obj:`mmcv.ConfigDict` | dict): Testing config of Maskformer\n            head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 feat_channels,\n                 out_channels,\n                 num_things_classes=80,\n                 num_stuff_classes=53,\n                 num_queries=100,\n                 pixel_decoder=None,\n                 enforce_decoder_input_project=False,\n                 transformer_decoder=None,\n                 positional_encoding=None,\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     loss_weight=1.0,\n                     class_weight=[1.0] * 133 + [0.1]),\n                 loss_mask=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=20.0),\n                 loss_dice=dict(\n                     type='DiceLoss',\n                     use_sigmoid=True,\n                     activate=True,\n                     naive_dice=True,\n                     loss_weight=1.0),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 **kwargs):\n        super(AnchorFreeHead, self).__init__(init_cfg)\n        self.num_things_classes = num_things_classes\n        self.num_stuff_classes = num_stuff_classes\n        self.num_classes = self.num_things_classes + self.num_stuff_classes\n        self.num_queries = num_queries\n\n        pixel_decoder.update(\n            in_channels=in_channels,\n            feat_channels=feat_channels,\n            out_channels=out_channels)\n        self.pixel_decoder = build_plugin_layer(pixel_decoder)[1]\n        self.transformer_decoder = build_transformer_layer_sequence(\n            transformer_decoder)\n        self.decoder_embed_dims = self.transformer_decoder.embed_dims\n        pixel_decoder_type = pixel_decoder.get('type')\n        if pixel_decoder_type == 'PixelDecoder' and (\n                self.decoder_embed_dims != in_channels[-1]\n                or enforce_decoder_input_project):\n            self.decoder_input_proj = Conv2d(\n                in_channels[-1], self.decoder_embed_dims, kernel_size=1)\n        else:\n            self.decoder_input_proj = nn.Identity()\n        self.decoder_pe = build_positional_encoding(positional_encoding)\n        self.query_embed = nn.Embedding(self.num_queries, out_channels)\n\n        self.cls_embed = nn.Linear(feat_channels, self.num_classes + 1)\n        self.mask_embed = nn.Sequential(\n            nn.Linear(feat_channels, feat_channels), nn.ReLU(inplace=True),\n            nn.Linear(feat_channels, feat_channels), nn.ReLU(inplace=True),\n            nn.Linear(feat_channels, out_channels))\n\n        self.test_cfg = test_cfg\n        self.train_cfg = train_cfg\n        if train_cfg:\n            self.assigner = build_assigner(train_cfg.get('assigner', None))\n            self.sampler = build_sampler(\n                train_cfg.get('sampler', None), context=self)\n\n        self.class_weight = loss_cls.get('class_weight', None)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_mask = build_loss(loss_mask)\n        self.loss_dice = build_loss(loss_dice)\n\n    def init_weights(self):\n        if isinstance(self.decoder_input_proj, Conv2d):\n            caffe2_xavier_init(self.decoder_input_proj, bias=0)\n\n        self.pixel_decoder.init_weights()\n\n        for p in self.transformer_decoder.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n\n    def preprocess_gt(self, gt_labels_list, gt_masks_list, gt_semantic_segs,\n                      img_metas):\n        \"\"\"Preprocess the ground truth for all images.\n\n        Args:\n            gt_labels_list (list[Tensor]): Each is ground truth\n                labels of each bbox, with shape (num_gts, ).\n            gt_masks_list (list[BitmapMasks]): Each is ground truth\n                masks of each instances of a image, shape\n                (num_gts, h, w).\n            gt_semantic_seg (Tensor | None): Ground truth of semantic\n                segmentation with the shape (batch_size, n, h, w).\n                [0, num_thing_class - 1] means things,\n                [num_thing_class, num_class-1] means stuff,\n                255 means VOID. It's None when training instance segmentation.\n            img_metas (list[dict]): List of image meta information.\n\n        Returns:\n            tuple: a tuple containing the following targets.\n                - labels (list[Tensor]): Ground truth class indices\\\n                    for all images. Each with shape (n, ), n is the sum of\\\n                    number of stuff type and number of instance in a image.\n                - masks (list[Tensor]): Ground truth mask for each\\\n                    image, each with shape (n, h, w).\n        \"\"\"\n        num_things_list = [self.num_things_classes] * len(gt_labels_list)\n        num_stuff_list = [self.num_stuff_classes] * len(gt_labels_list)\n        if gt_semantic_segs is None:\n            gt_semantic_segs = [None] * len(gt_labels_list)\n\n        targets = multi_apply(preprocess_panoptic_gt, gt_labels_list,\n                              gt_masks_list, gt_semantic_segs, num_things_list,\n                              num_stuff_list, img_metas)\n        labels, masks = targets\n        return labels, masks\n\n    def get_targets(self, cls_scores_list, mask_preds_list, gt_labels_list,\n                    gt_masks_list, img_metas):\n        \"\"\"Compute classification and mask targets for all images for a decoder\n        layer.\n\n        Args:\n            cls_scores_list (list[Tensor]): Mask score logits from a single\n                decoder layer for all images. Each with shape (num_queries,\n                cls_out_channels).\n            mask_preds_list (list[Tensor]): Mask logits from a single decoder\n                layer for all images. Each with shape (num_queries, h, w).\n            gt_labels_list (list[Tensor]): Ground truth class indices for all\n                images. Each with shape (n, ), n is the sum of number of stuff\n                type and number of instance in a image.\n            gt_masks_list (list[Tensor]): Ground truth mask for each image,\n                each with shape (n, h, w).\n            img_metas (list[dict]): List of image meta information.\n\n        Returns:\n            tuple[list[Tensor]]: a tuple containing the following targets.\n                - labels_list (list[Tensor]): Labels of all images.\\\n                    Each with shape (num_queries, ).\n                - label_weights_list (list[Tensor]): Label weights\\\n                    of all images. Each with shape (num_queries, ).\n                - mask_targets_list (list[Tensor]): Mask targets of\\\n                    all images. Each with shape (num_queries, h, w).\n                - mask_weights_list (list[Tensor]): Mask weights of\\\n                    all images. Each with shape (num_queries, ).\n                - num_total_pos (int): Number of positive samples in\\\n                    all images.\n                - num_total_neg (int): Number of negative samples in\\\n                    all images.\n        \"\"\"\n        (labels_list, label_weights_list, mask_targets_list, mask_weights_list,\n         pos_inds_list,\n         neg_inds_list) = multi_apply(self._get_target_single, cls_scores_list,\n                                      mask_preds_list, gt_labels_list,\n                                      gt_masks_list, img_metas)\n\n        num_total_pos = sum((inds.numel() for inds in pos_inds_list))\n        num_total_neg = sum((inds.numel() for inds in neg_inds_list))\n        return (labels_list, label_weights_list, mask_targets_list,\n                mask_weights_list, num_total_pos, num_total_neg)\n\n    def _get_target_single(self, cls_score, mask_pred, gt_labels, gt_masks,\n                           img_metas):\n        \"\"\"Compute classification and mask targets for one image.\n\n        Args:\n            cls_score (Tensor): Mask score logits from a single decoder layer\n                for one image. Shape (num_queries, cls_out_channels).\n            mask_pred (Tensor): Mask logits for a single decoder layer for one\n                image. Shape (num_queries, h, w).\n            gt_labels (Tensor): Ground truth class indices for one image with\n                shape (n, ). n is the sum of number of stuff type and number\n                of instance in a image.\n            gt_masks (Tensor): Ground truth mask for each image, each with\n                shape (n, h, w).\n            img_metas (dict): Image informtation.\n\n        Returns:\n            tuple[Tensor]: a tuple containing the following for one image.\n                - labels (Tensor): Labels of each image.\n                    shape (num_queries, ).\n                - label_weights (Tensor): Label weights of each image.\n                    shape (num_queries, ).\n                - mask_targets (Tensor): Mask targets of each image.\n                    shape (num_queries, h, w).\n                - mask_weights (Tensor): Mask weights of each image.\n                    shape (num_queries, ).\n                - pos_inds (Tensor): Sampled positive indices for each image.\n                - neg_inds (Tensor): Sampled negative indices for each image.\n        \"\"\"\n        target_shape = mask_pred.shape[-2:]\n        if gt_masks.shape[0] > 0:\n            gt_masks_downsampled = F.interpolate(\n                gt_masks.unsqueeze(1).float(), target_shape,\n                mode='nearest').squeeze(1).long()\n        else:\n            gt_masks_downsampled = gt_masks\n\n        # assign and sample\n        assign_result = self.assigner.assign(cls_score, mask_pred, gt_labels,\n                                             gt_masks_downsampled, img_metas)\n        sampling_result = self.sampler.sample(assign_result, mask_pred,\n                                              gt_masks)\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n\n        # label target\n        labels = gt_labels.new_full((self.num_queries, ),\n                                    self.num_classes,\n                                    dtype=torch.long)\n        labels[pos_inds] = gt_labels[sampling_result.pos_assigned_gt_inds]\n        label_weights = gt_labels.new_ones(self.num_queries)\n\n        # mask target\n        mask_targets = gt_masks[sampling_result.pos_assigned_gt_inds]\n        mask_weights = mask_pred.new_zeros((self.num_queries, ))\n        mask_weights[pos_inds] = 1.0\n\n        return (labels, label_weights, mask_targets, mask_weights, pos_inds,\n                neg_inds)\n\n    @force_fp32(apply_to=('all_cls_scores', 'all_mask_preds'))\n    def loss(self, all_cls_scores, all_mask_preds, gt_labels_list,\n             gt_masks_list, img_metas):\n        \"\"\"Loss function.\n\n        Args:\n            all_cls_scores (Tensor): Classification scores for all decoder\n                layers with shape (num_decoder, batch_size, num_queries,\n                cls_out_channels). Note `cls_out_channels` should includes\n                background.\n            all_mask_preds (Tensor): Mask scores for all decoder layers with\n                shape (num_decoder, batch_size, num_queries, h, w).\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image with shape (n, ). n is the sum of number of stuff type\n                and number of instance in a image.\n            gt_masks_list (list[Tensor]): Ground truth mask for each image with\n                shape (n, h, w).\n            img_metas (list[dict]): List of image meta information.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        num_dec_layers = len(all_cls_scores)\n        all_gt_labels_list = [gt_labels_list for _ in range(num_dec_layers)]\n        all_gt_masks_list = [gt_masks_list for _ in range(num_dec_layers)]\n        img_metas_list = [img_metas for _ in range(num_dec_layers)]\n        losses_cls, losses_mask, losses_dice = multi_apply(\n            self.loss_single, all_cls_scores, all_mask_preds,\n            all_gt_labels_list, all_gt_masks_list, img_metas_list)\n\n        loss_dict = dict()\n        # loss from the last decoder layer\n        loss_dict['loss_cls'] = losses_cls[-1]\n        loss_dict['loss_mask'] = losses_mask[-1]\n        loss_dict['loss_dice'] = losses_dice[-1]\n        # loss from other decoder layers\n        num_dec_layer = 0\n        for loss_cls_i, loss_mask_i, loss_dice_i in zip(\n                losses_cls[:-1], losses_mask[:-1], losses_dice[:-1]):\n            loss_dict[f'd{num_dec_layer}.loss_cls'] = loss_cls_i\n            loss_dict[f'd{num_dec_layer}.loss_mask'] = loss_mask_i\n            loss_dict[f'd{num_dec_layer}.loss_dice'] = loss_dice_i\n            num_dec_layer += 1\n        return loss_dict\n\n    def loss_single(self, cls_scores, mask_preds, gt_labels_list,\n                    gt_masks_list, img_metas):\n        \"\"\"Loss function for outputs from a single decoder layer.\n\n        Args:\n            cls_scores (Tensor): Mask score logits from a single decoder layer\n                for all images. Shape (batch_size, num_queries,\n                cls_out_channels). Note `cls_out_channels` should includes\n                background.\n            mask_preds (Tensor): Mask logits for a pixel decoder for all\n                images. Shape (batch_size, num_queries, h, w).\n            gt_labels_list (list[Tensor]): Ground truth class indices for each\n                image, each with shape (n, ). n is the sum of number of stuff\n                types and number of instances in a image.\n            gt_masks_list (list[Tensor]): Ground truth mask for each image,\n                each with shape (n, h, w).\n            img_metas (list[dict]): List of image meta information.\n\n        Returns:\n            tuple[Tensor]: Loss components for outputs from a single decoder\\\n                layer.\n        \"\"\"\n        num_imgs = cls_scores.size(0)\n        cls_scores_list = [cls_scores[i] for i in range(num_imgs)]\n        mask_preds_list = [mask_preds[i] for i in range(num_imgs)]\n\n        (labels_list, label_weights_list, mask_targets_list, mask_weights_list,\n         num_total_pos,\n         num_total_neg) = self.get_targets(cls_scores_list, mask_preds_list,\n                                           gt_labels_list, gt_masks_list,\n                                           img_metas)\n        # shape (batch_size, num_queries)\n        labels = torch.stack(labels_list, dim=0)\n        # shape (batch_size, num_queries)\n        label_weights = torch.stack(label_weights_list, dim=0)\n        # shape (num_total_gts, h, w)\n        mask_targets = torch.cat(mask_targets_list, dim=0)\n        # shape (batch_size, num_queries)\n        mask_weights = torch.stack(mask_weights_list, dim=0)\n\n        # classfication loss\n        # shape (batch_size * num_queries, )\n        cls_scores = cls_scores.flatten(0, 1)\n        labels = labels.flatten(0, 1)\n        label_weights = label_weights.flatten(0, 1)\n\n        class_weight = cls_scores.new_tensor(self.class_weight)\n        loss_cls = self.loss_cls(\n            cls_scores,\n            labels,\n            label_weights,\n            avg_factor=class_weight[labels].sum())\n\n        num_total_masks = reduce_mean(cls_scores.new_tensor([num_total_pos]))\n        num_total_masks = max(num_total_masks, 1)\n\n        # extract positive ones\n        # shape (batch_size, num_queries, h, w) -> (num_total_gts, h, w)\n        mask_preds = mask_preds[mask_weights > 0]\n        target_shape = mask_targets.shape[-2:]\n\n        if mask_targets.shape[0] == 0:\n            # zero match\n            loss_dice = mask_preds.sum()\n            loss_mask = mask_preds.sum()\n            return loss_cls, loss_mask, loss_dice\n\n        # upsample to shape of target\n        # shape (num_total_gts, h, w)\n        mask_preds = F.interpolate(\n            mask_preds.unsqueeze(1),\n            target_shape,\n            mode='bilinear',\n            align_corners=False).squeeze(1)\n\n        # dice loss\n        loss_dice = self.loss_dice(\n            mask_preds, mask_targets, avg_factor=num_total_masks)\n\n        # mask loss\n        # FocalLoss support input of shape (n, num_class)\n        h, w = mask_preds.shape[-2:]\n        # shape (num_total_gts, h, w) -> (num_total_gts * h * w, 1)\n        mask_preds = mask_preds.reshape(-1, 1)\n        # shape (num_total_gts, h, w) -> (num_total_gts * h * w)\n        mask_targets = mask_targets.reshape(-1)\n        # target is (1 - mask_targets) !!!\n        loss_mask = self.loss_mask(\n            mask_preds, 1 - mask_targets, avg_factor=num_total_masks * h * w)\n\n        return loss_cls, loss_mask, loss_dice\n\n    def forward(self, feats, img_metas):\n        \"\"\"Forward function.\n\n        Args:\n            feats (list[Tensor]): Features from the upstream network, each\n                is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple: a tuple contains two elements.\n                - all_cls_scores (Tensor): Classification scores for each\\\n                    scale level. Each is a 4D-tensor with shape\\\n                    (num_decoder, batch_size, num_queries, cls_out_channels).\\\n                    Note `cls_out_channels` should includes background.\n                - all_mask_preds (Tensor): Mask scores for each decoder\\\n                    layer. Each with shape (num_decoder, batch_size,\\\n                    num_queries, h, w).\n        \"\"\"\n        batch_size = len(img_metas)\n        input_img_h, input_img_w = img_metas[0]['batch_input_shape']\n        padding_mask = feats[-1].new_ones(\n            (batch_size, input_img_h, input_img_w), dtype=torch.float32)\n        for i in range(batch_size):\n            img_h, img_w, _ = img_metas[i]['img_shape']\n            padding_mask[i, :img_h, :img_w] = 0\n        padding_mask = F.interpolate(\n            padding_mask.unsqueeze(1),\n            size=feats[-1].shape[-2:],\n            mode='nearest').to(torch.bool).squeeze(1)\n        # when backbone is swin, memory is output of last stage of swin.\n        # when backbone is r50, memory is output of tranformer encoder.\n        mask_features, memory = self.pixel_decoder(feats, img_metas)\n        pos_embed = self.decoder_pe(padding_mask)\n        memory = self.decoder_input_proj(memory)\n        # shape (batch_size, c, h, w) -> (h*w, batch_size, c)\n        memory = memory.flatten(2).permute(2, 0, 1)\n        pos_embed = pos_embed.flatten(2).permute(2, 0, 1)\n        # shape (batch_size, h * w)\n        padding_mask = padding_mask.flatten(1)\n        # shape = (num_queries, embed_dims)\n        query_embed = self.query_embed.weight\n        # shape = (num_queries, batch_size, embed_dims)\n        query_embed = query_embed.unsqueeze(1).repeat(1, batch_size, 1)\n        target = torch.zeros_like(query_embed)\n        # shape (num_decoder, num_queries, batch_size, embed_dims)\n        out_dec = self.transformer_decoder(\n            query=target,\n            key=memory,\n            value=memory,\n            key_pos=pos_embed,\n            query_pos=query_embed,\n            key_padding_mask=padding_mask)\n        # shape (num_decoder, batch_size, num_queries, embed_dims)\n        out_dec = out_dec.transpose(1, 2)\n\n        # cls_scores\n        all_cls_scores = self.cls_embed(out_dec)\n\n        # mask_preds\n        mask_embed = self.mask_embed(out_dec)\n        all_mask_preds = torch.einsum('lbqc,bchw->lbqhw', mask_embed,\n                                      mask_features)\n\n        return all_cls_scores, all_mask_preds\n\n    def forward_train(self,\n                      feats,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_masks,\n                      gt_semantic_seg,\n                      gt_bboxes_ignore=None):\n        \"\"\"Forward function for training mode.\n\n        Args:\n            feats (list[Tensor]): Multi-level features from the upstream\n                network, each is a 4D-tensor.\n            img_metas (list[Dict]): List of image information.\n            gt_bboxes (list[Tensor]): Each element is ground truth bboxes of\n                the image, shape (num_gts, 4). Not used here.\n            gt_labels (list[Tensor]): Each element is ground truth labels of\n                each box, shape (num_gts,).\n            gt_masks (list[BitmapMasks]): Each element is masks of instances\n                of a image, shape (num_gts, h, w).\n            gt_semantic_seg (list[tensor] | None): Each element is the ground\n                truth of semantic segmentation with the shape (N, H, W).\n                [0, num_thing_class - 1] means things,\n                [num_thing_class, num_class-1] means stuff,\n                255 means VOID. It's None when training instance segmentation.\n            gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be\n                ignored. Defaults to None.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # not consider ignoring bboxes\n        assert gt_bboxes_ignore is None\n\n        # forward\n        all_cls_scores, all_mask_preds = self(feats, img_metas)\n\n        # preprocess ground truth\n        gt_labels, gt_masks = self.preprocess_gt(gt_labels, gt_masks,\n                                                 gt_semantic_seg, img_metas)\n\n        # loss\n        losses = self.loss(all_cls_scores, all_mask_preds, gt_labels, gt_masks,\n                           img_metas)\n\n        return losses\n\n    def simple_test(self, feats, img_metas, **kwargs):\n        \"\"\"Test without augmentaton.\n\n        Args:\n            feats (list[Tensor]): Multi-level features from the\n                upstream network, each is a 4D-tensor.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple: A tuple contains two tensors.\n\n            - mask_cls_results (Tensor): Mask classification logits,\\\n                shape (batch_size, num_queries, cls_out_channels).\n                Note `cls_out_channels` should includes background.\n            - mask_pred_results (Tensor): Mask logits, shape \\\n                (batch_size, num_queries, h, w).\n        \"\"\"\n        all_cls_scores, all_mask_preds = self(feats, img_metas)\n        mask_cls_results = all_cls_scores[-1]\n        mask_pred_results = all_mask_preds[-1]\n\n        # upsample masks\n        img_shape = img_metas[0]['batch_input_shape']\n        mask_pred_results = F.interpolate(\n            mask_pred_results,\n            size=(img_shape[0], img_shape[1]),\n            mode='bilinear',\n            align_corners=False)\n\n        return mask_cls_results, mask_pred_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/nasfcos_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, Scale\n\nfrom mmdet.models.dense_heads.fcos_head import FCOSHead\nfrom ..builder import HEADS\n\n\n@HEADS.register_module()\nclass NASFCOSHead(FCOSHead):\n    \"\"\"Anchor-free head used in `NASFCOS <https://arxiv.org/abs/1906.04423>`_.\n\n    It is quite similar with FCOS head, except for the searched structure of\n    classification branch and bbox regression branch, where a structure of\n    \"dconv3x3, conv3x3, dconv3x3, conv1x1\" is utilized instead.\n    \"\"\"\n\n    def __init__(self, *args, init_cfg=None, **kwargs):\n        if init_cfg is None:\n            init_cfg = [\n                dict(type='Caffe2Xavier', layer=['ConvModule', 'Conv2d']),\n                dict(\n                    type='Normal',\n                    std=0.01,\n                    override=[\n                        dict(name='conv_reg'),\n                        dict(name='conv_centerness'),\n                        dict(\n                            name='conv_cls',\n                            type='Normal',\n                            std=0.01,\n                            bias_prob=0.01)\n                    ]),\n            ]\n        super(NASFCOSHead, self).__init__(*args, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        dconv3x3_config = dict(\n            type='DCNv2',\n            kernel_size=3,\n            use_bias=True,\n            deform_groups=2,\n            padding=1)\n        conv3x3_config = dict(type='Conv', kernel_size=3, padding=1)\n        conv1x1_config = dict(type='Conv', kernel_size=1)\n\n        self.arch_config = [\n            dconv3x3_config, conv3x3_config, dconv3x3_config, conv1x1_config\n        ]\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i, op_ in enumerate(self.arch_config):\n            op = copy.deepcopy(op_)\n            chn = self.in_channels if i == 0 else self.feat_channels\n            assert isinstance(op, dict)\n            use_bias = op.pop('use_bias', False)\n            padding = op.pop('padding', 0)\n            kernel_size = op.pop('kernel_size')\n            module = ConvModule(\n                chn,\n                self.feat_channels,\n                kernel_size,\n                stride=1,\n                padding=padding,\n                norm_cfg=self.norm_cfg,\n                bias=use_bias,\n                conv_cfg=op)\n\n            self.cls_convs.append(copy.deepcopy(module))\n            self.reg_convs.append(copy.deepcopy(module))\n\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n        self.conv_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)\n        self.conv_centerness = nn.Conv2d(self.feat_channels, 1, 3, padding=1)\n\n        self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/paa_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import multi_apply, multiclass_nms\nfrom mmdet.core.bbox.iou_calculators import bbox_overlaps\nfrom mmdet.models import HEADS\nfrom mmdet.models.dense_heads import ATSSHead\n\nEPS = 1e-12\ntry:\n    import sklearn.mixture as skm\nexcept ImportError:\n    skm = None\n\n\ndef levels_to_images(mlvl_tensor):\n    \"\"\"Concat multi-level feature maps by image.\n\n    [feature_level0, feature_level1...] -> [feature_image0, feature_image1...]\n    Convert the shape of each element in mlvl_tensor from (N, C, H, W) to\n    (N, H*W , C), then split the element to N elements with shape (H*W, C), and\n    concat elements in same image of all level along first dimension.\n\n    Args:\n        mlvl_tensor (list[torch.Tensor]): list of Tensor which collect from\n            corresponding level. Each element is of shape (N, C, H, W)\n\n    Returns:\n        list[torch.Tensor]: A list that contains N tensors and each tensor is\n            of shape (num_elements, C)\n    \"\"\"\n    batch_size = mlvl_tensor[0].size(0)\n    batch_list = [[] for _ in range(batch_size)]\n    channels = mlvl_tensor[0].size(1)\n    for t in mlvl_tensor:\n        t = t.permute(0, 2, 3, 1)\n        t = t.view(batch_size, -1, channels).contiguous()\n        for img in range(batch_size):\n            batch_list[img].append(t[img])\n    return [torch.cat(item, 0) for item in batch_list]\n\n\n@HEADS.register_module()\nclass PAAHead(ATSSHead):\n    \"\"\"Head of PAAAssignment: Probabilistic Anchor Assignment with IoU\n    Prediction for Object Detection.\n\n    Code is modified from the `official github repo\n    <https://github.com/kkhoot/PAA/blob/master/paa_core\n    /modeling/rpn/paa/loss.py>`_.\n\n    More details can be found in the `paper\n    <https://arxiv.org/abs/2007.08103>`_ .\n\n    Args:\n        topk (int): Select topk samples with smallest loss in\n            each level.\n        score_voting (bool): Whether to use score voting in post-process.\n        covariance_type : String describing the type of covariance parameters\n            to be used in :class:`sklearn.mixture.GaussianMixture`.\n            It must be one of:\n\n            - 'full': each component has its own general covariance matrix\n            - 'tied': all components share the same general covariance matrix\n            - 'diag': each component has its own diagonal covariance matrix\n            - 'spherical': each component has its own single variance\n            Default: 'diag'. From 'full' to 'spherical', the gmm fitting\n            process is faster yet the performance could be influenced. For most\n            cases, 'diag' should be a good choice.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 topk=9,\n                 score_voting=True,\n                 covariance_type='diag',\n                 **kwargs):\n        # topk used in paa reassign process\n        self.topk = topk\n        self.with_score_voting = score_voting\n        self.covariance_type = covariance_type\n        super(PAAHead, self).__init__(*args, **kwargs)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'iou_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             iou_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            iou_preds (list[Tensor]): iou_preds for each scale\n                level with shape (N, num_anchors * 1, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding\n                boxes can be ignored when are computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss gmm_assignment.\n        \"\"\"\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n        )\n        (labels, labels_weight, bboxes_target, bboxes_weight, pos_inds,\n         pos_gt_index) = cls_reg_targets\n        cls_scores = levels_to_images(cls_scores)\n        cls_scores = [\n            item.reshape(-1, self.cls_out_channels) for item in cls_scores\n        ]\n        bbox_preds = levels_to_images(bbox_preds)\n        bbox_preds = [item.reshape(-1, 4) for item in bbox_preds]\n        iou_preds = levels_to_images(iou_preds)\n        iou_preds = [item.reshape(-1, 1) for item in iou_preds]\n        pos_losses_list, = multi_apply(self.get_pos_loss, anchor_list,\n                                       cls_scores, bbox_preds, labels,\n                                       labels_weight, bboxes_target,\n                                       bboxes_weight, pos_inds)\n\n        with torch.no_grad():\n            reassign_labels, reassign_label_weight, \\\n                reassign_bbox_weights, num_pos = multi_apply(\n                    self.paa_reassign,\n                    pos_losses_list,\n                    labels,\n                    labels_weight,\n                    bboxes_weight,\n                    pos_inds,\n                    pos_gt_index,\n                    anchor_list)\n            num_pos = sum(num_pos)\n        # convert all tensor list to a flatten tensor\n        cls_scores = torch.cat(cls_scores, 0).view(-1, cls_scores[0].size(-1))\n        bbox_preds = torch.cat(bbox_preds, 0).view(-1, bbox_preds[0].size(-1))\n        iou_preds = torch.cat(iou_preds, 0).view(-1, iou_preds[0].size(-1))\n        labels = torch.cat(reassign_labels, 0).view(-1)\n        flatten_anchors = torch.cat(\n            [torch.cat(item, 0) for item in anchor_list])\n        labels_weight = torch.cat(reassign_label_weight, 0).view(-1)\n        bboxes_target = torch.cat(bboxes_target,\n                                  0).view(-1, bboxes_target[0].size(-1))\n\n        pos_inds_flatten = ((labels >= 0)\n                            &\n                            (labels < self.num_classes)).nonzero().reshape(-1)\n\n        losses_cls = self.loss_cls(\n            cls_scores,\n            labels,\n            labels_weight,\n            avg_factor=max(num_pos, len(img_metas)))  # avoid num_pos=0\n        if num_pos:\n            pos_bbox_pred = self.bbox_coder.decode(\n                flatten_anchors[pos_inds_flatten],\n                bbox_preds[pos_inds_flatten])\n            pos_bbox_target = bboxes_target[pos_inds_flatten]\n            iou_target = bbox_overlaps(\n                pos_bbox_pred.detach(), pos_bbox_target, is_aligned=True)\n            losses_iou = self.loss_centerness(\n                iou_preds[pos_inds_flatten],\n                iou_target.unsqueeze(-1),\n                avg_factor=num_pos)\n            losses_bbox = self.loss_bbox(\n                pos_bbox_pred,\n                pos_bbox_target,\n                iou_target.clamp(min=EPS),\n                avg_factor=iou_target.sum())\n        else:\n            losses_iou = iou_preds.sum() * 0\n            losses_bbox = bbox_preds.sum() * 0\n\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox, loss_iou=losses_iou)\n\n    def get_pos_loss(self, anchors, cls_score, bbox_pred, label, label_weight,\n                     bbox_target, bbox_weight, pos_inds):\n        \"\"\"Calculate loss of all potential positive samples obtained from first\n        match process.\n\n        Args:\n            anchors (list[Tensor]): Anchors of each scale.\n            cls_score (Tensor): Box scores of single image with shape\n                (num_anchors, num_classes)\n            bbox_pred (Tensor): Box energies / deltas of single image\n                with shape (num_anchors, 4)\n            label (Tensor): classification target of each anchor with\n                shape (num_anchors,)\n            label_weight (Tensor): Classification loss weight of each\n                anchor with shape (num_anchors).\n            bbox_target (dict): Regression target of each anchor with\n                shape (num_anchors, 4).\n            bbox_weight (Tensor): Bbox weight of each anchor with shape\n                (num_anchors, 4).\n            pos_inds (Tensor): Index of all positive samples got from\n                first assign process.\n\n        Returns:\n            Tensor: Losses of all positive samples in single image.\n        \"\"\"\n        if not len(pos_inds):\n            return cls_score.new([]),\n        anchors_all_level = torch.cat(anchors, 0)\n        pos_scores = cls_score[pos_inds]\n        pos_bbox_pred = bbox_pred[pos_inds]\n        pos_label = label[pos_inds]\n        pos_label_weight = label_weight[pos_inds]\n        pos_bbox_target = bbox_target[pos_inds]\n        pos_bbox_weight = bbox_weight[pos_inds]\n        pos_anchors = anchors_all_level[pos_inds]\n        pos_bbox_pred = self.bbox_coder.decode(pos_anchors, pos_bbox_pred)\n\n        # to keep loss dimension\n        loss_cls = self.loss_cls(\n            pos_scores,\n            pos_label,\n            pos_label_weight,\n            avg_factor=1.0,\n            reduction_override='none')\n\n        loss_bbox = self.loss_bbox(\n            pos_bbox_pred,\n            pos_bbox_target,\n            pos_bbox_weight,\n            avg_factor=1.0,  # keep same loss weight before reassign\n            reduction_override='none')\n\n        loss_cls = loss_cls.sum(-1)\n        pos_loss = loss_bbox + loss_cls\n        return pos_loss,\n\n    def paa_reassign(self, pos_losses, label, label_weight, bbox_weight,\n                     pos_inds, pos_gt_inds, anchors):\n        \"\"\"Fit loss to GMM distribution and separate positive, ignore, negative\n        samples again with GMM model.\n\n        Args:\n            pos_losses (Tensor): Losses of all positive samples in\n                single image.\n            label (Tensor): classification target of each anchor with\n                shape (num_anchors,)\n            label_weight (Tensor): Classification loss weight of each\n                anchor with shape (num_anchors).\n            bbox_weight (Tensor): Bbox weight of each anchor with shape\n                (num_anchors, 4).\n            pos_inds (Tensor): Index of all positive samples got from\n                first assign process.\n            pos_gt_inds (Tensor): Gt_index of all positive samples got\n                from first assign process.\n            anchors (list[Tensor]): Anchors of each scale.\n\n        Returns:\n            tuple: Usually returns a tuple containing learning targets.\n\n                - label (Tensor): classification target of each anchor after\n                  paa assign, with shape (num_anchors,)\n                - label_weight (Tensor): Classification loss weight of each\n                  anchor after paa assign, with shape (num_anchors).\n                - bbox_weight (Tensor): Bbox weight of each anchor with shape\n                  (num_anchors, 4).\n                - num_pos (int): The number of positive samples after paa\n                  assign.\n        \"\"\"\n        if not len(pos_inds):\n            return label, label_weight, bbox_weight, 0\n        label = label.clone()\n        label_weight = label_weight.clone()\n        bbox_weight = bbox_weight.clone()\n        num_gt = pos_gt_inds.max() + 1\n        num_level = len(anchors)\n        num_anchors_each_level = [item.size(0) for item in anchors]\n        num_anchors_each_level.insert(0, 0)\n        inds_level_interval = np.cumsum(num_anchors_each_level)\n        pos_level_mask = []\n        for i in range(num_level):\n            mask = (pos_inds >= inds_level_interval[i]) & (\n                pos_inds < inds_level_interval[i + 1])\n            pos_level_mask.append(mask)\n        pos_inds_after_paa = [label.new_tensor([])]\n        ignore_inds_after_paa = [label.new_tensor([])]\n        for gt_ind in range(num_gt):\n            pos_inds_gmm = []\n            pos_loss_gmm = []\n            gt_mask = pos_gt_inds == gt_ind\n            for level in range(num_level):\n                level_mask = pos_level_mask[level]\n                level_gt_mask = level_mask & gt_mask\n                value, topk_inds = pos_losses[level_gt_mask].topk(\n                    min(level_gt_mask.sum(), self.topk), largest=False)\n                pos_inds_gmm.append(pos_inds[level_gt_mask][topk_inds])\n                pos_loss_gmm.append(value)\n            pos_inds_gmm = torch.cat(pos_inds_gmm)\n            pos_loss_gmm = torch.cat(pos_loss_gmm)\n            # fix gmm need at least two sample\n            if len(pos_inds_gmm) < 2:\n                continue\n            device = pos_inds_gmm.device\n            pos_loss_gmm, sort_inds = pos_loss_gmm.sort()\n            pos_inds_gmm = pos_inds_gmm[sort_inds]\n            pos_loss_gmm = pos_loss_gmm.view(-1, 1).cpu().numpy()\n            min_loss, max_loss = pos_loss_gmm.min(), pos_loss_gmm.max()\n            means_init = np.array([min_loss, max_loss]).reshape(2, 1)\n            weights_init = np.array([0.5, 0.5])\n            precisions_init = np.array([1.0, 1.0]).reshape(2, 1, 1)  # full\n            if self.covariance_type == 'spherical':\n                precisions_init = precisions_init.reshape(2)\n            elif self.covariance_type == 'diag':\n                precisions_init = precisions_init.reshape(2, 1)\n            elif self.covariance_type == 'tied':\n                precisions_init = np.array([[1.0]])\n            if skm is None:\n                raise ImportError('Please run \"pip install sklearn\" '\n                                  'to install sklearn first.')\n            gmm = skm.GaussianMixture(\n                2,\n                weights_init=weights_init,\n                means_init=means_init,\n                precisions_init=precisions_init,\n                covariance_type=self.covariance_type)\n            gmm.fit(pos_loss_gmm)\n            gmm_assignment = gmm.predict(pos_loss_gmm)\n            scores = gmm.score_samples(pos_loss_gmm)\n            gmm_assignment = torch.from_numpy(gmm_assignment).to(device)\n            scores = torch.from_numpy(scores).to(device)\n\n            pos_inds_temp, ignore_inds_temp = self.gmm_separation_scheme(\n                gmm_assignment, scores, pos_inds_gmm)\n            pos_inds_after_paa.append(pos_inds_temp)\n            ignore_inds_after_paa.append(ignore_inds_temp)\n\n        pos_inds_after_paa = torch.cat(pos_inds_after_paa)\n        ignore_inds_after_paa = torch.cat(ignore_inds_after_paa)\n        reassign_mask = (pos_inds.unsqueeze(1) != pos_inds_after_paa).all(1)\n        reassign_ids = pos_inds[reassign_mask]\n        label[reassign_ids] = self.num_classes\n        label_weight[ignore_inds_after_paa] = 0\n        bbox_weight[reassign_ids] = 0\n        num_pos = len(pos_inds_after_paa)\n        return label, label_weight, bbox_weight, num_pos\n\n    def gmm_separation_scheme(self, gmm_assignment, scores, pos_inds_gmm):\n        \"\"\"A general separation scheme for gmm model.\n\n        It separates a GMM distribution of candidate samples into three\n        parts, 0 1 and uncertain areas, and you can implement other\n        separation schemes by rewriting this function.\n\n        Args:\n            gmm_assignment (Tensor): The prediction of GMM which is of shape\n                (num_samples,). The 0/1 value indicates the distribution\n                that each sample comes from.\n            scores (Tensor): The probability of sample coming from the\n                fit GMM distribution. The tensor is of shape (num_samples,).\n            pos_inds_gmm (Tensor): All the indexes of samples which are used\n                to fit GMM model. The tensor is of shape (num_samples,)\n\n        Returns:\n            tuple[Tensor]: The indices of positive and ignored samples.\n\n                - pos_inds_temp (Tensor): Indices of positive samples.\n                - ignore_inds_temp (Tensor): Indices of ignore samples.\n        \"\"\"\n        # The implementation is (c) in Fig.3 in origin paper instead of (b).\n        # You can refer to issues such as\n        # https://github.com/kkhoot/PAA/issues/8 and\n        # https://github.com/kkhoot/PAA/issues/9.\n        fgs = gmm_assignment == 0\n        pos_inds_temp = fgs.new_tensor([], dtype=torch.long)\n        ignore_inds_temp = fgs.new_tensor([], dtype=torch.long)\n        if fgs.nonzero().numel():\n            _, pos_thr_ind = scores[fgs].topk(1)\n            pos_inds_temp = pos_inds_gmm[fgs][:pos_thr_ind + 1]\n            ignore_inds_temp = pos_inds_gmm.new_tensor([])\n        return pos_inds_temp, ignore_inds_temp\n\n    def get_targets(\n        self,\n        anchor_list,\n        valid_flag_list,\n        gt_bboxes_list,\n        img_metas,\n        gt_bboxes_ignore_list=None,\n        gt_labels_list=None,\n        label_channels=1,\n        unmap_outputs=True,\n    ):\n        \"\"\"Get targets for PAA head.\n\n        This method is almost the same as `AnchorHead.get_targets()`. We direct\n        return the results from _get_targets_single instead map it to levels\n        by images_to_levels function.\n\n        Args:\n            anchor_list (list[list[Tensor]]): Multi level anchors of each\n                image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_anchors, 4).\n            valid_flag_list (list[list[Tensor]]): Multi level valid flags of\n                each image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_anchors, )\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be\n                ignored.\n            gt_labels_list (list[Tensor]): Ground truth labels of each box.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: Usually returns a tuple containing learning targets.\n\n                - labels (list[Tensor]): Labels of all anchors, each with\n                    shape (num_anchors,).\n                - label_weights (list[Tensor]): Label weights of all anchor.\n                    each with shape (num_anchors,).\n                - bbox_targets (list[Tensor]): BBox targets of all anchors.\n                    each with shape (num_anchors, 4).\n                - bbox_weights (list[Tensor]): BBox weights of all anchors.\n                    each with shape (num_anchors, 4).\n                - pos_inds (list[Tensor]): Contains all index of positive\n                    sample in all anchor.\n                - gt_inds (list[Tensor]): Contains all gt_index of positive\n                    sample in all anchor.\n        \"\"\"\n\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n        concat_anchor_list = []\n        concat_valid_flag_list = []\n        for i in range(num_imgs):\n            assert len(anchor_list[i]) == len(valid_flag_list[i])\n            concat_anchor_list.append(torch.cat(anchor_list[i]))\n            concat_valid_flag_list.append(torch.cat(valid_flag_list[i]))\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        results = multi_apply(\n            self._get_targets_single,\n            concat_anchor_list,\n            concat_valid_flag_list,\n            gt_bboxes_list,\n            gt_bboxes_ignore_list,\n            gt_labels_list,\n            img_metas,\n            label_channels=label_channels,\n            unmap_outputs=unmap_outputs)\n\n        (labels, label_weights, bbox_targets, bbox_weights, valid_pos_inds,\n         valid_neg_inds, sampling_result) = results\n\n        # Due to valid flag of anchors, we have to calculate the real pos_inds\n        # in origin anchor set.\n        pos_inds = []\n        for i, single_labels in enumerate(labels):\n            pos_mask = (0 <= single_labels) & (\n                single_labels < self.num_classes)\n            pos_inds.append(pos_mask.nonzero().view(-1))\n\n        gt_inds = [item.pos_assigned_gt_inds for item in sampling_result]\n        return (labels, label_weights, bbox_targets, bbox_weights, pos_inds,\n                gt_inds)\n\n    def _get_targets_single(self,\n                            flat_anchors,\n                            valid_flags,\n                            gt_bboxes,\n                            gt_bboxes_ignore,\n                            gt_labels,\n                            img_meta,\n                            label_channels=1,\n                            unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in a\n        single image.\n\n        This method is same as `AnchorHead._get_targets_single()`.\n        \"\"\"\n        assert unmap_outputs, 'We must map outputs back to the original' \\\n                              'set of anchors in PAAhead'\n        return super(ATSSHead, self)._get_targets_single(\n            flat_anchors,\n            valid_flags,\n            gt_bboxes,\n            gt_bboxes_ignore,\n            gt_labels,\n            img_meta,\n            label_channels=1,\n            unmap_outputs=True)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   score_factors=None,\n                   img_metas=None,\n                   cfg=None,\n                   rescale=False,\n                   with_nms=True,\n                   **kwargs):\n        assert with_nms, 'PAA only supports \"with_nms=True\" now and it ' \\\n                         'means PAAHead does not support ' \\\n                         'test-time augmentation'\n        return super(ATSSHead, self).get_bboxes(cls_scores, bbox_preds,\n                                                score_factors, img_metas, cfg,\n                                                rescale, with_nms, **kwargs)\n\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_priors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_priors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has shape\n                (num_priors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factors from all scale\n                levels of a single image, each item has shape\n                (num_priors * 1, H, W).\n            mlvl_priors (list[Tensor]): Each element in the list is\n                the priors of a single level in feature pyramid, has shape\n                (num_priors, 4).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        img_shape = img_meta['img_shape']\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_score_factors = []\n        for level_idx, (cls_score, bbox_pred, score_factor, priors) in \\\n                enumerate(zip(cls_score_list, bbox_pred_list,\n                              score_factor_list, mlvl_priors)):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n\n            scores = cls_score.permute(1, 2, 0).reshape(\n                -1, self.cls_out_channels).sigmoid()\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            score_factor = score_factor.permute(1, 2, 0).reshape(-1).sigmoid()\n\n            if 0 < nms_pre < scores.shape[0]:\n                max_scores, _ = (scores *\n                                 score_factor[:, None]).sqrt().max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                priors = priors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                score_factor = score_factor[topk_inds]\n\n            bboxes = self.bbox_coder.decode(\n                priors, bbox_pred, max_shape=img_shape)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_score_factors.append(score_factor)\n\n        return self._bbox_post_process(mlvl_scores, mlvl_bboxes,\n                                       img_meta['scale_factor'], cfg, rescale,\n                                       with_nms, mlvl_score_factors, **kwargs)\n\n    def _bbox_post_process(self,\n                           mlvl_scores,\n                           mlvl_bboxes,\n                           scale_factor,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           mlvl_score_factors=None,\n                           **kwargs):\n        \"\"\"bbox post-processing method.\n\n        The boxes would be rescaled to the original image scale and do\n        the nms operation. Usually with_nms is False is used for aug test.\n\n        Args:\n            mlvl_scores (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_bboxes, num_class).\n            mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale\n                levels of a single image, each item has shape (num_bboxes, 4).\n            scale_factor (ndarray, optional): Scale factor of the image arange\n                as (w_scale, h_scale, w_scale, h_scale).\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n            mlvl_score_factors (list[Tensor], optional): Score factor from\n                all scale levels of a single image, each item has shape\n                (num_bboxes, ). Default: None.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        if rescale:\n            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)\n        mlvl_scores = torch.cat(mlvl_scores)\n        # Add a dummy background class to the backend when using sigmoid\n        # remind that we set FG labels to [0, num_class-1] since mmdet v2.0\n        # BG cat_id: num_class\n        padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n        mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n\n        mlvl_iou_preds = torch.cat(mlvl_score_factors)\n        mlvl_nms_scores = (mlvl_scores * mlvl_iou_preds[:, None]).sqrt()\n        det_bboxes, det_labels = multiclass_nms(\n            mlvl_bboxes,\n            mlvl_nms_scores,\n            cfg.score_thr,\n            cfg.nms,\n            cfg.max_per_img,\n            score_factors=None)\n        if self.with_score_voting and len(det_bboxes) > 0:\n            det_bboxes, det_labels = self.score_voting(det_bboxes, det_labels,\n                                                       mlvl_bboxes,\n                                                       mlvl_nms_scores,\n                                                       cfg.score_thr)\n\n        return det_bboxes, det_labels\n\n    def score_voting(self, det_bboxes, det_labels, mlvl_bboxes,\n                     mlvl_nms_scores, score_thr):\n        \"\"\"Implementation of score voting method works on each remaining boxes\n        after NMS procedure.\n\n        Args:\n            det_bboxes (Tensor): Remaining boxes after NMS procedure,\n                with shape (k, 5), each dimension means\n                (x1, y1, x2, y2, score).\n            det_labels (Tensor): The label of remaining boxes, with shape\n                (k, 1),Labels are 0-based.\n            mlvl_bboxes (Tensor): All boxes before the NMS procedure,\n                with shape (num_anchors,4).\n            mlvl_nms_scores (Tensor): The scores of all boxes which is used\n                in the NMS procedure, with shape (num_anchors, num_class)\n            score_thr (float): The score threshold of bboxes.\n\n        Returns:\n            tuple: Usually returns a tuple containing voting results.\n\n                - det_bboxes_voted (Tensor): Remaining boxes after\n                    score voting procedure, with shape (k, 5), each\n                    dimension means (x1, y1, x2, y2, score).\n                - det_labels_voted (Tensor): Label of remaining bboxes\n                    after voting, with shape (num_anchors,).\n        \"\"\"\n        candidate_mask = mlvl_nms_scores > score_thr\n        candidate_mask_nonzeros = candidate_mask.nonzero(as_tuple=False)\n        candidate_inds = candidate_mask_nonzeros[:, 0]\n        candidate_labels = candidate_mask_nonzeros[:, 1]\n        candidate_bboxes = mlvl_bboxes[candidate_inds]\n        candidate_scores = mlvl_nms_scores[candidate_mask]\n        det_bboxes_voted = []\n        det_labels_voted = []\n        for cls in range(self.cls_out_channels):\n            candidate_cls_mask = candidate_labels == cls\n            if not candidate_cls_mask.any():\n                continue\n            candidate_cls_scores = candidate_scores[candidate_cls_mask]\n            candidate_cls_bboxes = candidate_bboxes[candidate_cls_mask]\n            det_cls_mask = det_labels == cls\n            det_cls_bboxes = det_bboxes[det_cls_mask].view(\n                -1, det_bboxes.size(-1))\n            det_candidate_ious = bbox_overlaps(det_cls_bboxes[:, :4],\n                                               candidate_cls_bboxes)\n            for det_ind in range(len(det_cls_bboxes)):\n                single_det_ious = det_candidate_ious[det_ind]\n                pos_ious_mask = single_det_ious > 0.01\n                pos_ious = single_det_ious[pos_ious_mask]\n                pos_bboxes = candidate_cls_bboxes[pos_ious_mask]\n                pos_scores = candidate_cls_scores[pos_ious_mask]\n                pis = (torch.exp(-(1 - pos_ious)**2 / 0.025) *\n                       pos_scores)[:, None]\n                voted_box = torch.sum(\n                    pis * pos_bboxes, dim=0) / torch.sum(\n                        pis, dim=0)\n                voted_score = det_cls_bboxes[det_ind][-1:][None, :]\n                det_bboxes_voted.append(\n                    torch.cat((voted_box[None, :], voted_score), dim=1))\n                det_labels_voted.append(cls)\n\n        det_bboxes_voted = torch.cat(det_bboxes_voted, dim=0)\n        det_labels_voted = det_labels.new_tensor(det_labels_voted)\n        return det_bboxes_voted, det_labels_voted\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/pisa_retinanet_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import images_to_levels\nfrom ..builder import HEADS\nfrom ..losses import carl_loss, isr_p\nfrom .retina_head import RetinaHead\n\n\n@HEADS.register_module()\nclass PISARetinaHead(RetinaHead):\n    \"\"\"PISA Retinanet Head.\n\n    The head owns the same structure with Retinanet Head, but differs in two\n        aspects:\n        1. Importance-based Sample Reweighting Positive (ISR-P) is applied to\n            change the positive loss weights.\n        2. Classification-aware regression loss is adopted as a third loss.\n    \"\"\"\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image\n                with shape (num_obj, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each image\n                with shape (num_obj, 4).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor]): Ignored gt bboxes of each image.\n                Default: None.\n\n        Returns:\n            dict: Loss dict, comprise classification loss, regression loss and\n                carl loss.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n            return_sampling_results=True)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg, sampling_results_list) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        # concat all level anchors and flags to a single tensor\n        concat_anchor_list = []\n        for i in range(len(anchor_list)):\n            concat_anchor_list.append(torch.cat(anchor_list[i]))\n        all_anchor_list = images_to_levels(concat_anchor_list,\n                                           num_level_anchors)\n\n        num_imgs = len(img_metas)\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1, label_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_cls_scores = torch.cat(\n            flatten_cls_scores, dim=1).reshape(-1,\n                                               flatten_cls_scores[0].size(-1))\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_bbox_preds = torch.cat(\n            flatten_bbox_preds, dim=1).view(-1, flatten_bbox_preds[0].size(-1))\n        flatten_labels = torch.cat(labels_list, dim=1).reshape(-1)\n        flatten_label_weights = torch.cat(\n            label_weights_list, dim=1).reshape(-1)\n        flatten_anchors = torch.cat(all_anchor_list, dim=1).reshape(-1, 4)\n        flatten_bbox_targets = torch.cat(\n            bbox_targets_list, dim=1).reshape(-1, 4)\n        flatten_bbox_weights = torch.cat(\n            bbox_weights_list, dim=1).reshape(-1, 4)\n\n        # Apply ISR-P\n        isr_cfg = self.train_cfg.get('isr', None)\n        if isr_cfg is not None:\n            all_targets = (flatten_labels, flatten_label_weights,\n                           flatten_bbox_targets, flatten_bbox_weights)\n            with torch.no_grad():\n                all_targets = isr_p(\n                    flatten_cls_scores,\n                    flatten_bbox_preds,\n                    all_targets,\n                    flatten_anchors,\n                    sampling_results_list,\n                    bbox_coder=self.bbox_coder,\n                    loss_cls=self.loss_cls,\n                    num_class=self.num_classes,\n                    **self.train_cfg.isr)\n            (flatten_labels, flatten_label_weights, flatten_bbox_targets,\n             flatten_bbox_weights) = all_targets\n\n        # For convenience we compute loss once instead separating by fpn level,\n        # so that we don't need to separate the weights by level again.\n        # The result should be the same\n        losses_cls = self.loss_cls(\n            flatten_cls_scores,\n            flatten_labels,\n            flatten_label_weights,\n            avg_factor=num_total_samples)\n        losses_bbox = self.loss_bbox(\n            flatten_bbox_preds,\n            flatten_bbox_targets,\n            flatten_bbox_weights,\n            avg_factor=num_total_samples)\n        loss_dict = dict(loss_cls=losses_cls, loss_bbox=losses_bbox)\n\n        # CARL Loss\n        carl_cfg = self.train_cfg.get('carl', None)\n        if carl_cfg is not None:\n            loss_carl = carl_loss(\n                flatten_cls_scores,\n                flatten_labels,\n                flatten_bbox_preds,\n                flatten_bbox_targets,\n                self.loss_bbox,\n                **self.train_cfg.carl,\n                avg_factor=num_total_pos,\n                sigmoid=True,\n                num_class=self.num_classes)\n            loss_dict.update(loss_carl)\n\n        return loss_dict\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/pisa_ssd_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import multi_apply\nfrom ..builder import HEADS\nfrom ..losses import CrossEntropyLoss, SmoothL1Loss, carl_loss, isr_p\nfrom .ssd_head import SSDHead\n\n\n# TODO: add loss evaluator for SSD\n@HEADS.register_module()\nclass PISASSDHead(SSDHead):\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image\n                with shape (num_obj, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each image\n                with shape (num_obj, 4).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor]): Ignored gt bboxes of each image.\n                Default: None.\n\n        Returns:\n            dict: Loss dict, comprise classification loss regression loss and\n                carl loss.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=1,\n            unmap_outputs=False,\n            return_sampling_results=True)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg, sampling_results_list) = cls_reg_targets\n\n        num_images = len(img_metas)\n        all_cls_scores = torch.cat([\n            s.permute(0, 2, 3, 1).reshape(\n                num_images, -1, self.cls_out_channels) for s in cls_scores\n        ], 1)\n        all_labels = torch.cat(labels_list, -1).view(num_images, -1)\n        all_label_weights = torch.cat(label_weights_list,\n                                      -1).view(num_images, -1)\n        all_bbox_preds = torch.cat([\n            b.permute(0, 2, 3, 1).reshape(num_images, -1, 4)\n            for b in bbox_preds\n        ], -2)\n        all_bbox_targets = torch.cat(bbox_targets_list,\n                                     -2).view(num_images, -1, 4)\n        all_bbox_weights = torch.cat(bbox_weights_list,\n                                     -2).view(num_images, -1, 4)\n\n        # concat all level anchors to a single tensor\n        all_anchors = []\n        for i in range(num_images):\n            all_anchors.append(torch.cat(anchor_list[i]))\n\n        isr_cfg = self.train_cfg.get('isr', None)\n        all_targets = (all_labels.view(-1), all_label_weights.view(-1),\n                       all_bbox_targets.view(-1,\n                                             4), all_bbox_weights.view(-1, 4))\n        # apply ISR-P\n        if isr_cfg is not None:\n            all_targets = isr_p(\n                all_cls_scores.view(-1, all_cls_scores.size(-1)),\n                all_bbox_preds.view(-1, 4),\n                all_targets,\n                torch.cat(all_anchors),\n                sampling_results_list,\n                loss_cls=CrossEntropyLoss(),\n                bbox_coder=self.bbox_coder,\n                **self.train_cfg.isr,\n                num_class=self.num_classes)\n            (new_labels, new_label_weights, new_bbox_targets,\n             new_bbox_weights) = all_targets\n            all_labels = new_labels.view(all_labels.shape)\n            all_label_weights = new_label_weights.view(all_label_weights.shape)\n            all_bbox_targets = new_bbox_targets.view(all_bbox_targets.shape)\n            all_bbox_weights = new_bbox_weights.view(all_bbox_weights.shape)\n\n        # add CARL loss\n        carl_loss_cfg = self.train_cfg.get('carl', None)\n        if carl_loss_cfg is not None:\n            loss_carl = carl_loss(\n                all_cls_scores.view(-1, all_cls_scores.size(-1)),\n                all_targets[0],\n                all_bbox_preds.view(-1, 4),\n                all_targets[2],\n                SmoothL1Loss(beta=1.),\n                **self.train_cfg.carl,\n                avg_factor=num_total_pos,\n                num_class=self.num_classes)\n\n        # check NaN and Inf\n        assert torch.isfinite(all_cls_scores).all().item(), \\\n            'classification scores become infinite or NaN!'\n        assert torch.isfinite(all_bbox_preds).all().item(), \\\n            'bbox predications become infinite or NaN!'\n\n        losses_cls, losses_bbox = multi_apply(\n            self.loss_single,\n            all_cls_scores,\n            all_bbox_preds,\n            all_anchors,\n            all_labels,\n            all_label_weights,\n            all_bbox_targets,\n            all_bbox_weights,\n            num_total_samples=num_total_pos)\n        loss_dict = dict(loss_cls=losses_cls, loss_bbox=losses_bbox)\n        if carl_loss_cfg is not None:\n            loss_dict.update(loss_carl)\n        return loss_dict\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/reppoints_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import DeformConv2d\n\nfrom mmdet.core import (build_assigner, build_sampler, images_to_levels,\n                        multi_apply, unmap)\nfrom mmdet.core.anchor.point_generator import MlvlPointGenerator\nfrom mmdet.core.utils import filter_scores_and_topk\nfrom ..builder import HEADS, build_loss\nfrom .anchor_free_head import AnchorFreeHead\n\n\n@HEADS.register_module()\nclass RepPointsHead(AnchorFreeHead):\n    \"\"\"RepPoint head.\n\n    Args:\n        point_feat_channels (int): Number of channels of points features.\n        gradient_mul (float): The multiplier to gradients from\n            points refinement and recognition.\n        point_strides (Iterable): points strides.\n        point_base_scale (int): bbox scale for assigning labels.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox_init (dict): Config of initial points loss.\n        loss_bbox_refine (dict): Config of points loss in refinement.\n        use_grid_points (bool): If we use bounding box representation, the\n        reppoints is represented as grid points on the bounding box.\n        center_init (bool): Whether to use center point assignment.\n        transform_method (str): The methods to transform RepPoints to bbox.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 point_feat_channels=256,\n                 num_points=9,\n                 gradient_mul=0.1,\n                 point_strides=[8, 16, 32, 64, 128],\n                 point_base_scale=4,\n                 loss_cls=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_bbox_init=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=0.5),\n                 loss_bbox_refine=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),\n                 use_grid_points=False,\n                 center_init=True,\n                 transform_method='moment',\n                 moment_mul=0.01,\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='reppoints_cls_out',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        self.num_points = num_points\n        self.point_feat_channels = point_feat_channels\n        self.use_grid_points = use_grid_points\n        self.center_init = center_init\n\n        # we use deform conv to extract points features\n        self.dcn_kernel = int(np.sqrt(num_points))\n        self.dcn_pad = int((self.dcn_kernel - 1) / 2)\n        assert self.dcn_kernel * self.dcn_kernel == num_points, \\\n            'The points number should be a square number.'\n        assert self.dcn_kernel % 2 == 1, \\\n            'The points number should be an odd square number.'\n        dcn_base = np.arange(-self.dcn_pad,\n                             self.dcn_pad + 1).astype(np.float64)\n        dcn_base_y = np.repeat(dcn_base, self.dcn_kernel)\n        dcn_base_x = np.tile(dcn_base, self.dcn_kernel)\n        dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape(\n            (-1))\n        self.dcn_base_offset = torch.tensor(dcn_base_offset).view(1, -1, 1, 1)\n\n        super().__init__(\n            num_classes,\n            in_channels,\n            loss_cls=loss_cls,\n            init_cfg=init_cfg,\n            **kwargs)\n\n        self.gradient_mul = gradient_mul\n        self.point_base_scale = point_base_scale\n        self.point_strides = point_strides\n        self.prior_generator = MlvlPointGenerator(\n            self.point_strides, offset=0.)\n\n        self.sampling = loss_cls['type'] not in ['FocalLoss']\n        if self.train_cfg:\n            self.init_assigner = build_assigner(self.train_cfg.init.assigner)\n            self.refine_assigner = build_assigner(\n                self.train_cfg.refine.assigner)\n            # use PseudoSampler when sampling is False\n            if self.sampling and hasattr(self.train_cfg, 'sampler'):\n                sampler_cfg = self.train_cfg.sampler\n            else:\n                sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.transform_method = transform_method\n        if self.transform_method == 'moment':\n            self.moment_transfer = nn.Parameter(\n                data=torch.zeros(2), requires_grad=True)\n            self.moment_mul = moment_mul\n\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        if self.use_sigmoid_cls:\n            self.cls_out_channels = self.num_classes\n        else:\n            self.cls_out_channels = self.num_classes + 1\n        self.loss_bbox_init = build_loss(loss_bbox_init)\n        self.loss_bbox_refine = build_loss(loss_bbox_refine)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        pts_out_dim = 4 if self.use_grid_points else 2 * self.num_points\n        self.reppoints_cls_conv = DeformConv2d(self.feat_channels,\n                                               self.point_feat_channels,\n                                               self.dcn_kernel, 1,\n                                               self.dcn_pad)\n        self.reppoints_cls_out = nn.Conv2d(self.point_feat_channels,\n                                           self.cls_out_channels, 1, 1, 0)\n        self.reppoints_pts_init_conv = nn.Conv2d(self.feat_channels,\n                                                 self.point_feat_channels, 3,\n                                                 1, 1)\n        self.reppoints_pts_init_out = nn.Conv2d(self.point_feat_channels,\n                                                pts_out_dim, 1, 1, 0)\n        self.reppoints_pts_refine_conv = DeformConv2d(self.feat_channels,\n                                                      self.point_feat_channels,\n                                                      self.dcn_kernel, 1,\n                                                      self.dcn_pad)\n        self.reppoints_pts_refine_out = nn.Conv2d(self.point_feat_channels,\n                                                  pts_out_dim, 1, 1, 0)\n\n    def points2bbox(self, pts, y_first=True):\n        \"\"\"Converting the points set into bounding box.\n\n        :param pts: the input points sets (fields), each points\n            set (fields) is represented as 2n scalar.\n        :param y_first: if y_first=True, the point set is represented as\n            [y1, x1, y2, x2 ... yn, xn], otherwise the point set is\n            represented as [x1, y1, x2, y2 ... xn, yn].\n        :return: each points set is converting to a bbox [x1, y1, x2, y2].\n        \"\"\"\n        pts_reshape = pts.view(pts.shape[0], -1, 2, *pts.shape[2:])\n        pts_y = pts_reshape[:, :, 0, ...] if y_first else pts_reshape[:, :, 1,\n                                                                      ...]\n        pts_x = pts_reshape[:, :, 1, ...] if y_first else pts_reshape[:, :, 0,\n                                                                      ...]\n        if self.transform_method == 'minmax':\n            bbox_left = pts_x.min(dim=1, keepdim=True)[0]\n            bbox_right = pts_x.max(dim=1, keepdim=True)[0]\n            bbox_up = pts_y.min(dim=1, keepdim=True)[0]\n            bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]\n            bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],\n                             dim=1)\n        elif self.transform_method == 'partial_minmax':\n            pts_y = pts_y[:, :4, ...]\n            pts_x = pts_x[:, :4, ...]\n            bbox_left = pts_x.min(dim=1, keepdim=True)[0]\n            bbox_right = pts_x.max(dim=1, keepdim=True)[0]\n            bbox_up = pts_y.min(dim=1, keepdim=True)[0]\n            bbox_bottom = pts_y.max(dim=1, keepdim=True)[0]\n            bbox = torch.cat([bbox_left, bbox_up, bbox_right, bbox_bottom],\n                             dim=1)\n        elif self.transform_method == 'moment':\n            pts_y_mean = pts_y.mean(dim=1, keepdim=True)\n            pts_x_mean = pts_x.mean(dim=1, keepdim=True)\n            pts_y_std = torch.std(pts_y - pts_y_mean, dim=1, keepdim=True)\n            pts_x_std = torch.std(pts_x - pts_x_mean, dim=1, keepdim=True)\n            moment_transfer = (self.moment_transfer * self.moment_mul) + (\n                self.moment_transfer.detach() * (1 - self.moment_mul))\n            moment_width_transfer = moment_transfer[0]\n            moment_height_transfer = moment_transfer[1]\n            half_width = pts_x_std * torch.exp(moment_width_transfer)\n            half_height = pts_y_std * torch.exp(moment_height_transfer)\n            bbox = torch.cat([\n                pts_x_mean - half_width, pts_y_mean - half_height,\n                pts_x_mean + half_width, pts_y_mean + half_height\n            ],\n                             dim=1)\n        else:\n            raise NotImplementedError\n        return bbox\n\n    def gen_grid_from_reg(self, reg, previous_boxes):\n        \"\"\"Base on the previous bboxes and regression values, we compute the\n        regressed bboxes and generate the grids on the bboxes.\n\n        :param reg: the regression value to previous bboxes.\n        :param previous_boxes: previous bboxes.\n        :return: generate grids on the regressed bboxes.\n        \"\"\"\n        b, _, h, w = reg.shape\n        bxy = (previous_boxes[:, :2, ...] + previous_boxes[:, 2:, ...]) / 2.\n        bwh = (previous_boxes[:, 2:, ...] -\n               previous_boxes[:, :2, ...]).clamp(min=1e-6)\n        grid_topleft = bxy + bwh * reg[:, :2, ...] - 0.5 * bwh * torch.exp(\n            reg[:, 2:, ...])\n        grid_wh = bwh * torch.exp(reg[:, 2:, ...])\n        grid_left = grid_topleft[:, [0], ...]\n        grid_top = grid_topleft[:, [1], ...]\n        grid_width = grid_wh[:, [0], ...]\n        grid_height = grid_wh[:, [1], ...]\n        intervel = torch.linspace(0., 1., self.dcn_kernel).view(\n            1, self.dcn_kernel, 1, 1).type_as(reg)\n        grid_x = grid_left + grid_width * intervel\n        grid_x = grid_x.unsqueeze(1).repeat(1, self.dcn_kernel, 1, 1, 1)\n        grid_x = grid_x.view(b, -1, h, w)\n        grid_y = grid_top + grid_height * intervel\n        grid_y = grid_y.unsqueeze(2).repeat(1, 1, self.dcn_kernel, 1, 1)\n        grid_y = grid_y.view(b, -1, h, w)\n        grid_yx = torch.stack([grid_y, grid_x], dim=2)\n        grid_yx = grid_yx.view(b, -1, h, w)\n        regressed_bbox = torch.cat([\n            grid_left, grid_top, grid_left + grid_width, grid_top + grid_height\n        ], 1)\n        return grid_yx, regressed_bbox\n\n    def forward(self, feats):\n        return multi_apply(self.forward_single, feats)\n\n    def forward_single(self, x):\n        \"\"\"Forward feature map of a single FPN level.\"\"\"\n        dcn_base_offset = self.dcn_base_offset.type_as(x)\n        # If we use center_init, the initial reppoints is from center points.\n        # If we use bounding bbox representation, the initial reppoints is\n        #   from regular grid placed on a pre-defined bbox.\n        if self.use_grid_points or not self.center_init:\n            scale = self.point_base_scale / 2\n            points_init = dcn_base_offset / dcn_base_offset.max() * scale\n            bbox_init = x.new_tensor([-scale, -scale, scale,\n                                      scale]).view(1, 4, 1, 1)\n        else:\n            points_init = 0\n        cls_feat = x\n        pts_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            pts_feat = reg_conv(pts_feat)\n        # initialize reppoints\n        pts_out_init = self.reppoints_pts_init_out(\n            self.relu(self.reppoints_pts_init_conv(pts_feat)))\n        if self.use_grid_points:\n            pts_out_init, bbox_out_init = self.gen_grid_from_reg(\n                pts_out_init, bbox_init.detach())\n        else:\n            pts_out_init = pts_out_init + points_init\n        # refine and classify reppoints\n        pts_out_init_grad_mul = (1 - self.gradient_mul) * pts_out_init.detach(\n        ) + self.gradient_mul * pts_out_init\n        dcn_offset = pts_out_init_grad_mul - dcn_base_offset\n        cls_out = self.reppoints_cls_out(\n            self.relu(self.reppoints_cls_conv(cls_feat, dcn_offset)))\n        pts_out_refine = self.reppoints_pts_refine_out(\n            self.relu(self.reppoints_pts_refine_conv(pts_feat, dcn_offset)))\n        if self.use_grid_points:\n            pts_out_refine, bbox_out_refine = self.gen_grid_from_reg(\n                pts_out_refine, bbox_out_init.detach())\n        else:\n            pts_out_refine = pts_out_refine + pts_out_init.detach()\n\n        if self.training:\n            return cls_out, pts_out_init, pts_out_refine\n        else:\n            return cls_out, self.points2bbox(pts_out_refine)\n\n    def get_points(self, featmap_sizes, img_metas, device):\n        \"\"\"Get points according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            img_metas (list[dict]): Image meta info.\n\n        Returns:\n            tuple: points of each image, valid flags of each image\n        \"\"\"\n        num_imgs = len(img_metas)\n\n        # since feature map sizes of all images are the same, we only compute\n        # points center for one time\n        multi_level_points = self.prior_generator.grid_priors(\n            featmap_sizes, device=device, with_stride=True)\n        points_list = [[point.clone() for point in multi_level_points]\n                       for _ in range(num_imgs)]\n\n        # for each image, we compute valid flags of multi level grids\n        valid_flag_list = []\n        for img_id, img_meta in enumerate(img_metas):\n            multi_level_flags = self.prior_generator.valid_flags(\n                featmap_sizes, img_meta['pad_shape'])\n            valid_flag_list.append(multi_level_flags)\n\n        return points_list, valid_flag_list\n\n    def centers_to_bboxes(self, point_list):\n        \"\"\"Get bboxes according to center points.\n\n        Only used in :class:`MaxIoUAssigner`.\n        \"\"\"\n        bbox_list = []\n        for i_img, point in enumerate(point_list):\n            bbox = []\n            for i_lvl in range(len(self.point_strides)):\n                scale = self.point_base_scale * self.point_strides[i_lvl] * 0.5\n                bbox_shift = torch.Tensor([-scale, -scale, scale,\n                                           scale]).view(1, 4).type_as(point[0])\n                bbox_center = torch.cat(\n                    [point[i_lvl][:, :2], point[i_lvl][:, :2]], dim=1)\n                bbox.append(bbox_center + bbox_shift)\n            bbox_list.append(bbox)\n        return bbox_list\n\n    def offset_to_pts(self, center_list, pred_list):\n        \"\"\"Change from point offset to point coordinate.\"\"\"\n        pts_list = []\n        for i_lvl in range(len(self.point_strides)):\n            pts_lvl = []\n            for i_img in range(len(center_list)):\n                pts_center = center_list[i_img][i_lvl][:, :2].repeat(\n                    1, self.num_points)\n                pts_shift = pred_list[i_lvl][i_img]\n                yx_pts_shift = pts_shift.permute(1, 2, 0).view(\n                    -1, 2 * self.num_points)\n                y_pts_shift = yx_pts_shift[..., 0::2]\n                x_pts_shift = yx_pts_shift[..., 1::2]\n                xy_pts_shift = torch.stack([x_pts_shift, y_pts_shift], -1)\n                xy_pts_shift = xy_pts_shift.view(*yx_pts_shift.shape[:-1], -1)\n                pts = xy_pts_shift * self.point_strides[i_lvl] + pts_center\n                pts_lvl.append(pts)\n            pts_lvl = torch.stack(pts_lvl, 0)\n            pts_list.append(pts_lvl)\n        return pts_list\n\n    def _point_target_single(self,\n                             flat_proposals,\n                             valid_flags,\n                             gt_bboxes,\n                             gt_bboxes_ignore,\n                             gt_labels,\n                             stage='init',\n                             unmap_outputs=True):\n        inside_flags = valid_flags\n        if not inside_flags.any():\n            return (None, ) * 7\n        # assign gt and sample proposals\n        proposals = flat_proposals[inside_flags, :]\n\n        if stage == 'init':\n            assigner = self.init_assigner\n            pos_weight = self.train_cfg.init.pos_weight\n        else:\n            assigner = self.refine_assigner\n            pos_weight = self.train_cfg.refine.pos_weight\n        assign_result = assigner.assign(proposals, gt_bboxes, gt_bboxes_ignore,\n                                        None if self.sampling else gt_labels)\n        sampling_result = self.sampler.sample(assign_result, proposals,\n                                              gt_bboxes)\n\n        num_valid_proposals = proposals.shape[0]\n        bbox_gt = proposals.new_zeros([num_valid_proposals, 4])\n        pos_proposals = torch.zeros_like(proposals)\n        proposals_weights = proposals.new_zeros([num_valid_proposals, 4])\n        labels = proposals.new_full((num_valid_proposals, ),\n                                    self.num_classes,\n                                    dtype=torch.long)\n        label_weights = proposals.new_zeros(\n            num_valid_proposals, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            pos_gt_bboxes = sampling_result.pos_gt_bboxes\n            bbox_gt[pos_inds, :] = pos_gt_bboxes\n            pos_proposals[pos_inds, :] = proposals[pos_inds, :]\n            proposals_weights[pos_inds, :] = 1.0\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of proposals\n        if unmap_outputs:\n            num_total_proposals = flat_proposals.size(0)\n            labels = unmap(labels, num_total_proposals, inside_flags)\n            label_weights = unmap(label_weights, num_total_proposals,\n                                  inside_flags)\n            bbox_gt = unmap(bbox_gt, num_total_proposals, inside_flags)\n            pos_proposals = unmap(pos_proposals, num_total_proposals,\n                                  inside_flags)\n            proposals_weights = unmap(proposals_weights, num_total_proposals,\n                                      inside_flags)\n\n        return (labels, label_weights, bbox_gt, pos_proposals,\n                proposals_weights, pos_inds, neg_inds)\n\n    def get_targets(self,\n                    proposals_list,\n                    valid_flag_list,\n                    gt_bboxes_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None,\n                    gt_labels_list=None,\n                    stage='init',\n                    label_channels=1,\n                    unmap_outputs=True):\n        \"\"\"Compute corresponding GT box and classification targets for\n        proposals.\n\n        Args:\n            proposals_list (list[list]): Multi level points/bboxes of each\n                image.\n            valid_flag_list (list[list]): Multi level valid flags of each\n                image.\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be\n                ignored.\n            gt_bboxes_list (list[Tensor]): Ground truth labels of each box.\n            stage (str): `init` or `refine`. Generate target for init stage or\n                refine stage\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple:\n                - labels_list (list[Tensor]): Labels of each level.\n                - label_weights_list (list[Tensor]): Label weights of each level.  # noqa: E501\n                - bbox_gt_list (list[Tensor]): Ground truth bbox of each level.\n                - proposal_list (list[Tensor]): Proposals(points/bboxes) of each level.  # noqa: E501\n                - proposal_weights_list (list[Tensor]): Proposal weights of each level.  # noqa: E501\n                - num_total_pos (int): Number of positive samples in all images.  # noqa: E501\n                - num_total_neg (int): Number of negative samples in all images.  # noqa: E501\n        \"\"\"\n        assert stage in ['init', 'refine']\n        num_imgs = len(img_metas)\n        assert len(proposals_list) == len(valid_flag_list) == num_imgs\n\n        # points number of multi levels\n        num_level_proposals = [points.size(0) for points in proposals_list[0]]\n\n        # concat all level points and flags to a single tensor\n        for i in range(num_imgs):\n            assert len(proposals_list[i]) == len(valid_flag_list[i])\n            proposals_list[i] = torch.cat(proposals_list[i])\n            valid_flag_list[i] = torch.cat(valid_flag_list[i])\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        (all_labels, all_label_weights, all_bbox_gt, all_proposals,\n         all_proposal_weights, pos_inds_list, neg_inds_list) = multi_apply(\n             self._point_target_single,\n             proposals_list,\n             valid_flag_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             stage=stage,\n             unmap_outputs=unmap_outputs)\n        # no valid points\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled points of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        labels_list = images_to_levels(all_labels, num_level_proposals)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_proposals)\n        bbox_gt_list = images_to_levels(all_bbox_gt, num_level_proposals)\n        proposals_list = images_to_levels(all_proposals, num_level_proposals)\n        proposal_weights_list = images_to_levels(all_proposal_weights,\n                                                 num_level_proposals)\n        return (labels_list, label_weights_list, bbox_gt_list, proposals_list,\n                proposal_weights_list, num_total_pos, num_total_neg)\n\n    def loss_single(self, cls_score, pts_pred_init, pts_pred_refine, labels,\n                    label_weights, bbox_gt_init, bbox_weights_init,\n                    bbox_gt_refine, bbox_weights_refine, stride,\n                    num_total_samples_init, num_total_samples_refine):\n        # classification loss\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.permute(0, 2, 3,\n                                      1).reshape(-1, self.cls_out_channels)\n        cls_score = cls_score.contiguous()\n        loss_cls = self.loss_cls(\n            cls_score,\n            labels,\n            label_weights,\n            avg_factor=num_total_samples_refine)\n\n        # points loss\n        bbox_gt_init = bbox_gt_init.reshape(-1, 4)\n        bbox_weights_init = bbox_weights_init.reshape(-1, 4)\n        bbox_pred_init = self.points2bbox(\n            pts_pred_init.reshape(-1, 2 * self.num_points), y_first=False)\n        bbox_gt_refine = bbox_gt_refine.reshape(-1, 4)\n        bbox_weights_refine = bbox_weights_refine.reshape(-1, 4)\n        bbox_pred_refine = self.points2bbox(\n            pts_pred_refine.reshape(-1, 2 * self.num_points), y_first=False)\n        normalize_term = self.point_base_scale * stride\n        loss_pts_init = self.loss_bbox_init(\n            bbox_pred_init / normalize_term,\n            bbox_gt_init / normalize_term,\n            bbox_weights_init,\n            avg_factor=num_total_samples_init)\n        loss_pts_refine = self.loss_bbox_refine(\n            bbox_pred_refine / normalize_term,\n            bbox_gt_refine / normalize_term,\n            bbox_weights_refine,\n            avg_factor=num_total_samples_refine)\n        return loss_cls, loss_pts_init, loss_pts_refine\n\n    def loss(self,\n             cls_scores,\n             pts_preds_init,\n             pts_preds_refine,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        device = cls_scores[0].device\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        # target for initial stage\n        center_list, valid_flag_list = self.get_points(featmap_sizes,\n                                                       img_metas, device)\n        pts_coordinate_preds_init = self.offset_to_pts(center_list,\n                                                       pts_preds_init)\n        if self.train_cfg.init.assigner['type'] == 'PointAssigner':\n            # Assign target for center list\n            candidate_list = center_list\n        else:\n            # transform center list to bbox list and\n            #   assign target for bbox list\n            bbox_list = self.centers_to_bboxes(center_list)\n            candidate_list = bbox_list\n        cls_reg_targets_init = self.get_targets(\n            candidate_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            stage='init',\n            label_channels=label_channels)\n        (*_, bbox_gt_list_init, candidate_list_init, bbox_weights_list_init,\n         num_total_pos_init, num_total_neg_init) = cls_reg_targets_init\n        num_total_samples_init = (\n            num_total_pos_init +\n            num_total_neg_init if self.sampling else num_total_pos_init)\n\n        # target for refinement stage\n        center_list, valid_flag_list = self.get_points(featmap_sizes,\n                                                       img_metas, device)\n        pts_coordinate_preds_refine = self.offset_to_pts(\n            center_list, pts_preds_refine)\n        bbox_list = []\n        for i_img, center in enumerate(center_list):\n            bbox = []\n            for i_lvl in range(len(pts_preds_refine)):\n                bbox_preds_init = self.points2bbox(\n                    pts_preds_init[i_lvl].detach())\n                bbox_shift = bbox_preds_init * self.point_strides[i_lvl]\n                bbox_center = torch.cat(\n                    [center[i_lvl][:, :2], center[i_lvl][:, :2]], dim=1)\n                bbox.append(bbox_center +\n                            bbox_shift[i_img].permute(1, 2, 0).reshape(-1, 4))\n            bbox_list.append(bbox)\n        cls_reg_targets_refine = self.get_targets(\n            bbox_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            stage='refine',\n            label_channels=label_channels)\n        (labels_list, label_weights_list, bbox_gt_list_refine,\n         candidate_list_refine, bbox_weights_list_refine, num_total_pos_refine,\n         num_total_neg_refine) = cls_reg_targets_refine\n        num_total_samples_refine = (\n            num_total_pos_refine +\n            num_total_neg_refine if self.sampling else num_total_pos_refine)\n\n        # compute loss\n        losses_cls, losses_pts_init, losses_pts_refine = multi_apply(\n            self.loss_single,\n            cls_scores,\n            pts_coordinate_preds_init,\n            pts_coordinate_preds_refine,\n            labels_list,\n            label_weights_list,\n            bbox_gt_list_init,\n            bbox_weights_list_init,\n            bbox_gt_list_refine,\n            bbox_weights_list_refine,\n            self.point_strides,\n            num_total_samples_init=num_total_samples_init,\n            num_total_samples_refine=num_total_samples_refine)\n        loss_dict_all = {\n            'loss_cls': losses_cls,\n            'loss_pts_init': losses_pts_init,\n            'loss_pts_refine': losses_pts_refine\n        }\n        return loss_dict_all\n\n    # Same as base_dense_head/_get_bboxes_single except self._bbox_decode\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_priors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_priors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has shape\n                (num_priors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factor from all scale\n                levels of a single image. RepPoints head does not need\n                this value.\n            mlvl_priors (list[Tensor]): Each element in the list is\n                the priors of a single level in feature pyramid, has shape\n                (num_priors, 2).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_score_list) == len(bbox_pred_list)\n        img_shape = img_meta['img_shape']\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_labels = []\n        for level_idx, (cls_score, bbox_pred, priors) in enumerate(\n                zip(cls_score_list, bbox_pred_list, mlvl_priors)):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)[:, :-1]\n\n            # After https://github.com/open-mmlab/mmdetection/pull/6268/,\n            # this operation keeps fewer bboxes under the same `nms_pre`.\n            # There is no difference in performance for most models. If you\n            # find a slight drop in performance, you can set a larger\n            # `nms_pre` than before.\n            results = filter_scores_and_topk(\n                scores, cfg.score_thr, nms_pre,\n                dict(bbox_pred=bbox_pred, priors=priors))\n            scores, labels, _, filtered_results = results\n\n            bbox_pred = filtered_results['bbox_pred']\n            priors = filtered_results['priors']\n\n            bboxes = self._bbox_decode(priors, bbox_pred,\n                                       self.point_strides[level_idx],\n                                       img_shape)\n\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_labels.append(labels)\n\n        return self._bbox_post_process(\n            mlvl_scores,\n            mlvl_labels,\n            mlvl_bboxes,\n            img_meta['scale_factor'],\n            cfg,\n            rescale=rescale,\n            with_nms=with_nms)\n\n    def _bbox_decode(self, points, bbox_pred, stride, max_shape):\n        bbox_pos_center = torch.cat([points[:, :2], points[:, :2]], dim=1)\n        bboxes = bbox_pred * stride + bbox_pos_center\n        x1 = bboxes[:, 0].clamp(min=0, max=max_shape[1])\n        y1 = bboxes[:, 1].clamp(min=0, max=max_shape[0])\n        x2 = bboxes[:, 2].clamp(min=0, max=max_shape[1])\n        y2 = bboxes[:, 3].clamp(min=0, max=max_shape[0])\n        decoded_bboxes = torch.stack([x1, y1, x2, y2], dim=-1)\n        return decoded_bboxes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/retina_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\n\nfrom ..builder import HEADS\nfrom .anchor_head import AnchorHead\n\n\n@HEADS.register_module()\nclass RetinaHead(AnchorHead):\n    r\"\"\"An anchor-based head used in `RetinaNet\n    <https://arxiv.org/pdf/1708.02002.pdf>`_.\n\n    The head contains two subnetworks. The first classifies anchor boxes and\n    the second regresses deltas for the anchors.\n\n    Example:\n        >>> import torch\n        >>> self = RetinaHead(11, 7)\n        >>> x = torch.rand(1, 7, 32, 32)\n        >>> cls_score, bbox_pred = self.forward_single(x)\n        >>> # Each anchor predicts a score for each class except background\n        >>> cls_per_anchor = cls_score.shape[1] / self.num_anchors\n        >>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors\n        >>> assert cls_per_anchor == (self.num_classes)\n        >>> assert box_per_anchor == 4\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 anchor_generator=dict(\n                     type='AnchorGenerator',\n                     octave_base_scale=4,\n                     scales_per_octave=3,\n                     ratios=[0.5, 1.0, 2.0],\n                     strides=[8, 16, 32, 64, 128]),\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='retina_cls',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        super(RetinaHead, self).__init__(\n            num_classes,\n            in_channels,\n            anchor_generator=anchor_generator,\n            init_cfg=init_cfg,\n            **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.retina_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.retina_reg = nn.Conv2d(\n            self.feat_channels, self.num_base_priors * 4, 3, padding=1)\n\n    def forward_single(self, x):\n        \"\"\"Forward feature of a single scale level.\n\n        Args:\n            x (Tensor): Features of a single scale level.\n\n        Returns:\n            tuple:\n                cls_score (Tensor): Cls scores for a single scale level\n                    the channels number is num_anchors * num_classes.\n                bbox_pred (Tensor): Box energies / deltas for a single scale\n                    level, the channels number is num_anchors * 4.\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n        cls_score = self.retina_cls(cls_feat)\n        bbox_pred = self.retina_reg(reg_feat)\n        return cls_score, bbox_pred\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/retina_sepbn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, bias_init_with_prob, normal_init\n\nfrom ..builder import HEADS\nfrom .anchor_head import AnchorHead\n\n\n@HEADS.register_module()\nclass RetinaSepBNHead(AnchorHead):\n    \"\"\"\"RetinaHead with separate BN.\n\n    In RetinaHead, conv/norm layers are shared across different FPN levels,\n    while in RetinaSepBNHead, conv layers are shared across different FPN\n    levels, but BN layers are separated.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 num_ins,\n                 in_channels,\n                 stacked_convs=4,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.num_ins = num_ins\n        super(RetinaSepBNHead, self).__init__(\n            num_classes, in_channels, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.num_ins):\n            cls_convs = nn.ModuleList()\n            reg_convs = nn.ModuleList()\n            for i in range(self.stacked_convs):\n                chn = self.in_channels if i == 0 else self.feat_channels\n                cls_convs.append(\n                    ConvModule(\n                        chn,\n                        self.feat_channels,\n                        3,\n                        stride=1,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg))\n                reg_convs.append(\n                    ConvModule(\n                        chn,\n                        self.feat_channels,\n                        3,\n                        stride=1,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg))\n            self.cls_convs.append(cls_convs)\n            self.reg_convs.append(reg_convs)\n        for i in range(self.stacked_convs):\n            for j in range(1, self.num_ins):\n                self.cls_convs[j][i].conv = self.cls_convs[0][i].conv\n                self.reg_convs[j][i].conv = self.reg_convs[0][i].conv\n        self.retina_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.retina_reg = nn.Conv2d(\n            self.feat_channels, self.num_base_priors * 4, 3, padding=1)\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\"\"\"\n        super(RetinaSepBNHead, self).init_weights()\n        for m in self.cls_convs[0]:\n            normal_init(m.conv, std=0.01)\n        for m in self.reg_convs[0]:\n            normal_init(m.conv, std=0.01)\n        bias_cls = bias_init_with_prob(0.01)\n        normal_init(self.retina_cls, std=0.01, bias=bias_cls)\n        normal_init(self.retina_reg, std=0.01)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually a tuple of classification scores and bbox prediction\n                cls_scores (list[Tensor]): Classification scores for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * 4.\n        \"\"\"\n        cls_scores = []\n        bbox_preds = []\n        for i, x in enumerate(feats):\n            cls_feat = feats[i]\n            reg_feat = feats[i]\n            for cls_conv in self.cls_convs[i]:\n                cls_feat = cls_conv(cls_feat)\n            for reg_conv in self.reg_convs[i]:\n                reg_feat = reg_conv(reg_feat)\n            cls_score = self.retina_cls(cls_feat)\n            bbox_pred = self.retina_reg(reg_feat)\n            cls_scores.append(cls_score)\n            bbox_preds.append(bbox_pred)\n        return cls_scores, bbox_preds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/rpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import batched_nms\n\nfrom ..builder import HEADS\nfrom .anchor_head import AnchorHead\n\n\n@HEADS.register_module()\nclass RPNHead(AnchorHead):\n    \"\"\"RPN head.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n        num_convs (int): Number of convolution layers in the head. Default 1.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 in_channels,\n                 init_cfg=dict(type='Normal', layer='Conv2d', std=0.01),\n                 num_convs=1,\n                 **kwargs):\n        self.num_convs = num_convs\n        super(RPNHead, self).__init__(\n            1, in_channels, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        if self.num_convs > 1:\n            rpn_convs = []\n            for i in range(self.num_convs):\n                if i == 0:\n                    in_channels = self.in_channels\n                else:\n                    in_channels = self.feat_channels\n                # use ``inplace=False`` to avoid error: one of the variables\n                # needed for gradient computation has been modified by an\n                # inplace operation.\n                rpn_convs.append(\n                    ConvModule(\n                        in_channels,\n                        self.feat_channels,\n                        3,\n                        padding=1,\n                        inplace=False))\n            self.rpn_conv = nn.Sequential(*rpn_convs)\n        else:\n            self.rpn_conv = nn.Conv2d(\n                self.in_channels, self.feat_channels, 3, padding=1)\n        self.rpn_cls = nn.Conv2d(self.feat_channels,\n                                 self.num_base_priors * self.cls_out_channels,\n                                 1)\n        self.rpn_reg = nn.Conv2d(self.feat_channels, self.num_base_priors * 4,\n                                 1)\n\n    def forward_single(self, x):\n        \"\"\"Forward feature map of a single scale level.\"\"\"\n        x = self.rpn_conv(x)\n        x = F.relu(x, inplace=False)\n        rpn_cls_score = self.rpn_cls(x)\n        rpn_bbox_pred = self.rpn_reg(x)\n        return rpn_cls_score, rpn_bbox_pred\n\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        losses = super(RPNHead, self).loss(\n            cls_scores,\n            bbox_preds,\n            gt_bboxes,\n            None,\n            img_metas,\n            gt_bboxes_ignore=gt_bboxes_ignore)\n        return dict(\n            loss_rpn_cls=losses['loss_cls'], loss_rpn_bbox=losses['loss_bbox'])\n\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_anchors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_anchors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has\n                shape (num_anchors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factor from all scale\n                levels of a single image. RPN head does not need this value.\n            mlvl_anchors (list[Tensor]): Anchors of all scale level\n                each item has shape (num_anchors, 4).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        cfg = copy.deepcopy(cfg)\n        img_shape = img_meta['img_shape']\n\n        # bboxes from different level should be independent during NMS,\n        # level_ids are used as labels for batched NMS to separate them\n        level_ids = []\n        mlvl_scores = []\n        mlvl_bbox_preds = []\n        mlvl_valid_anchors = []\n        nms_pre = cfg.get('nms_pre', -1)\n        for level_idx in range(len(cls_score_list)):\n            rpn_cls_score = cls_score_list[level_idx]\n            rpn_bbox_pred = bbox_pred_list[level_idx]\n            assert rpn_cls_score.size()[-2:] == rpn_bbox_pred.size()[-2:]\n            rpn_cls_score = rpn_cls_score.permute(1, 2, 0)\n            if self.use_sigmoid_cls:\n                rpn_cls_score = rpn_cls_score.reshape(-1)\n                scores = rpn_cls_score.sigmoid()\n            else:\n                rpn_cls_score = rpn_cls_score.reshape(-1, 2)\n                # We set FG labels to [0, num_class-1] and BG label to\n                # num_class in RPN head since mmdet v2.5, which is unified to\n                # be consistent with other head since mmdet v2.0. In mmdet v2.0\n                # to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.\n                scores = rpn_cls_score.softmax(dim=1)[:, 0]\n            rpn_bbox_pred = rpn_bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n\n            anchors = mlvl_anchors[level_idx]\n            if 0 < nms_pre < scores.shape[0]:\n                # sort is faster than topk\n                # _, topk_inds = scores.topk(cfg.nms_pre)\n                ranked_scores, rank_inds = scores.sort(descending=True)\n                topk_inds = rank_inds[:nms_pre]\n                scores = ranked_scores[:nms_pre]\n                rpn_bbox_pred = rpn_bbox_pred[topk_inds, :]\n                anchors = anchors[topk_inds, :]\n\n            mlvl_scores.append(scores)\n            mlvl_bbox_preds.append(rpn_bbox_pred)\n            mlvl_valid_anchors.append(anchors)\n            level_ids.append(\n                scores.new_full((scores.size(0), ),\n                                level_idx,\n                                dtype=torch.long))\n\n        return self._bbox_post_process(mlvl_scores, mlvl_bbox_preds,\n                                       mlvl_valid_anchors, level_ids, cfg,\n                                       img_shape)\n\n    def _bbox_post_process(self, mlvl_scores, mlvl_bboxes, mlvl_valid_anchors,\n                           level_ids, cfg, img_shape, **kwargs):\n        \"\"\"bbox post-processing method.\n\n        Do the nms operation for bboxes in same level.\n\n        Args:\n            mlvl_scores (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_bboxes, ).\n            mlvl_bboxes (list[Tensor]): Decoded bboxes from all scale\n                levels of a single image, each item has shape (num_bboxes, 4).\n            mlvl_valid_anchors (list[Tensor]): Anchors of all scale level\n                each item has shape (num_bboxes, 4).\n            level_ids (list[Tensor]): Indexes from all scale levels of a\n                single image, each item has shape (num_bboxes, ).\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, `self.test_cfg` would be used.\n            img_shape (tuple(int)): The shape of model's input image.\n\n        Returns:\n            Tensor: Labeled boxes in shape (n, 5), where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1.\n        \"\"\"\n        scores = torch.cat(mlvl_scores)\n        anchors = torch.cat(mlvl_valid_anchors)\n        rpn_bbox_pred = torch.cat(mlvl_bboxes)\n        proposals = self.bbox_coder.decode(\n            anchors, rpn_bbox_pred, max_shape=img_shape)\n        ids = torch.cat(level_ids)\n\n        if cfg.min_bbox_size >= 0:\n            w = proposals[:, 2] - proposals[:, 0]\n            h = proposals[:, 3] - proposals[:, 1]\n            valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)\n            if not valid_mask.all():\n                proposals = proposals[valid_mask]\n                scores = scores[valid_mask]\n                ids = ids[valid_mask]\n\n        if proposals.numel() > 0:\n            dets, _ = batched_nms(proposals, scores, ids, cfg.nms)\n        else:\n            return proposals.new_zeros(0, 5)\n\n        return dets[:cfg.max_per_img]\n\n    def onnx_export(self, x, img_metas):\n        \"\"\"Test without augmentation.\n\n        Args:\n            x (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n            img_metas (list[dict]): Meta info of each image.\n        Returns:\n            Tensor: dets of shape [N, num_det, 5].\n        \"\"\"\n        cls_scores, bbox_preds = self(x)\n\n        assert len(cls_scores) == len(bbox_preds)\n\n        batch_bboxes, batch_scores = super(RPNHead, self).onnx_export(\n            cls_scores, bbox_preds, img_metas=img_metas, with_nms=False)\n        # Use ONNX::NonMaxSuppression in deployment\n        from mmdet.core.export import add_dummy_nms_for_onnx\n        cfg = copy.deepcopy(self.test_cfg)\n        score_threshold = cfg.nms.get('score_thr', 0.0)\n        nms_pre = cfg.get('deploy_nms_pre', -1)\n        # Different from the normal forward doing NMS level by level,\n        # we do NMS across all levels when exporting ONNX.\n        dets, _ = add_dummy_nms_for_onnx(batch_bboxes, batch_scores,\n                                         cfg.max_per_img,\n                                         cfg.nms.iou_threshold,\n                                         score_threshold, nms_pre,\n                                         cfg.max_per_img)\n        return dets\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/sabl_retina_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (build_assigner, build_bbox_coder,\n                        build_prior_generator, build_sampler, images_to_levels,\n                        multi_apply, unmap)\nfrom mmdet.core.utils import filter_scores_and_topk\nfrom ..builder import HEADS, build_loss\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\nfrom .guided_anchor_head import GuidedAnchorHead\n\n\n@HEADS.register_module()\nclass SABLRetinaHead(BaseDenseHead, BBoxTestMixin):\n    \"\"\"Side-Aware Boundary Localization (SABL) for RetinaNet.\n\n    The anchor generation, assigning and sampling in SABLRetinaHead\n    are the same as GuidedAnchorHead for guided anchoring.\n\n    Please refer to https://arxiv.org/abs/1912.04260 for more details.\n\n    Args:\n        num_classes (int): Number of classes.\n        in_channels (int): Number of channels in the input feature map.\n        stacked_convs (int): Number of Convs for classification \\\n            and regression branches. Defaults to 4.\n        feat_channels (int): Number of hidden channels. \\\n            Defaults to 256.\n        approx_anchor_generator (dict): Config dict for approx generator.\n        square_anchor_generator (dict): Config dict for square generator.\n        conv_cfg (dict): Config dict for ConvModule. Defaults to None.\n        norm_cfg (dict): Config dict for Norm Layer. Defaults to None.\n        bbox_coder (dict): Config dict for bbox coder.\n        reg_decoded_bbox (bool): If true, the regression loss would be\n            applied directly on decoded bounding boxes, converting both\n            the predicted boxes and regression targets to absolute\n            coordinates format. Default False. It should be `True` when\n            using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.\n        train_cfg (dict): Training config of SABLRetinaHead.\n        test_cfg (dict): Testing config of SABLRetinaHead.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox_cls (dict): Config of classification loss for bbox branch.\n        loss_bbox_reg (dict): Config of regression loss for bbox branch.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 stacked_convs=4,\n                 feat_channels=256,\n                 approx_anchor_generator=dict(\n                     type='AnchorGenerator',\n                     octave_base_scale=4,\n                     scales_per_octave=3,\n                     ratios=[0.5, 1.0, 2.0],\n                     strides=[8, 16, 32, 64, 128]),\n                 square_anchor_generator=dict(\n                     type='AnchorGenerator',\n                     ratios=[1.0],\n                     scales=[4],\n                     strides=[8, 16, 32, 64, 128]),\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 bbox_coder=dict(\n                     type='BucketingBBoxCoder',\n                     num_buckets=14,\n                     scale_factor=3.0),\n                 reg_decoded_bbox=False,\n                 train_cfg=None,\n                 test_cfg=None,\n                 loss_cls=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 loss_bbox_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.5),\n                 loss_bbox_reg=dict(\n                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5),\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='retina_cls',\n                         std=0.01,\n                         bias_prob=0.01))):\n        super(SABLRetinaHead, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.feat_channels = feat_channels\n        self.num_buckets = bbox_coder['num_buckets']\n        self.side_num = int(np.ceil(self.num_buckets / 2))\n\n        assert (approx_anchor_generator['octave_base_scale'] ==\n                square_anchor_generator['scales'][0])\n        assert (approx_anchor_generator['strides'] ==\n                square_anchor_generator['strides'])\n\n        self.approx_anchor_generator = build_prior_generator(\n            approx_anchor_generator)\n        self.square_anchor_generator = build_prior_generator(\n            square_anchor_generator)\n        self.approxs_per_octave = (\n            self.approx_anchor_generator.num_base_priors[0])\n\n        # one anchor per location\n        self.num_base_priors = self.square_anchor_generator.num_base_priors[0]\n\n        self.stacked_convs = stacked_convs\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        self.reg_decoded_bbox = reg_decoded_bbox\n\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n        self.sampling = loss_cls['type'] not in [\n            'FocalLoss', 'GHMC', 'QualityFocalLoss'\n        ]\n        if self.use_sigmoid_cls:\n            self.cls_out_channels = num_classes\n        else:\n            self.cls_out_channels = num_classes + 1\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox_cls = build_loss(loss_bbox_cls)\n        self.loss_bbox_reg = build_loss(loss_bbox_reg)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # use PseudoSampler when sampling is False\n            if self.sampling and hasattr(self.train_cfg, 'sampler'):\n                sampler_cfg = self.train_cfg.sampler\n            else:\n                sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.fp16_enabled = False\n        self._init_layers()\n\n    @property\n    def num_anchors(self):\n        warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '\n                      'please use \"num_base_priors\" instead')\n        return self.square_anchor_generator.num_base_priors[0]\n\n    def _init_layers(self):\n        self.relu = nn.ReLU(inplace=True)\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n            self.reg_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.retina_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n        self.retina_bbox_reg = nn.Conv2d(\n            self.feat_channels, self.side_num * 4, 3, padding=1)\n        self.retina_bbox_cls = nn.Conv2d(\n            self.feat_channels, self.side_num * 4, 3, padding=1)\n\n    def forward_single(self, x):\n        cls_feat = x\n        reg_feat = x\n        for cls_conv in self.cls_convs:\n            cls_feat = cls_conv(cls_feat)\n        for reg_conv in self.reg_convs:\n            reg_feat = reg_conv(reg_feat)\n        cls_score = self.retina_cls(cls_feat)\n        bbox_cls_pred = self.retina_bbox_cls(reg_feat)\n        bbox_reg_pred = self.retina_bbox_reg(reg_feat)\n        bbox_pred = (bbox_cls_pred, bbox_reg_pred)\n        return cls_score, bbox_pred\n\n    def forward(self, feats):\n        return multi_apply(self.forward_single, feats)\n\n    def get_anchors(self, featmap_sizes, img_metas, device='cuda'):\n        \"\"\"Get squares according to feature map sizes and guided anchors.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            img_metas (list[dict]): Image meta info.\n            device (torch.device | str): device for returned tensors\n\n        Returns:\n            tuple: square approxs of each image\n        \"\"\"\n        num_imgs = len(img_metas)\n\n        # since feature map sizes of all images are the same, we only compute\n        # squares for one time\n        multi_level_squares = self.square_anchor_generator.grid_priors(\n            featmap_sizes, device=device)\n        squares_list = [multi_level_squares for _ in range(num_imgs)]\n\n        return squares_list\n\n    def get_target(self,\n                   approx_list,\n                   inside_flag_list,\n                   square_list,\n                   gt_bboxes_list,\n                   img_metas,\n                   gt_bboxes_ignore_list=None,\n                   gt_labels_list=None,\n                   label_channels=None,\n                   sampling=True,\n                   unmap_outputs=True):\n        \"\"\"Compute bucketing targets.\n        Args:\n            approx_list (list[list]): Multi level approxs of each image.\n            inside_flag_list (list[list]): Multi level inside flags of each\n                image.\n            square_list (list[list]): Multi level squares of each image.\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.\n            gt_bboxes_list (list[Tensor]): Gt bboxes of each image.\n            label_channels (int): Channel of label.\n            sampling (bool): Sample Anchors or not.\n            unmap_outputs (bool): unmap outputs or not.\n\n        Returns:\n            tuple: Returns a tuple containing learning targets.\n\n                - labels_list (list[Tensor]): Labels of each level.\n                - label_weights_list (list[Tensor]): Label weights of each \\\n                    level.\n                - bbox_cls_targets_list (list[Tensor]): BBox cls targets of \\\n                    each level.\n                - bbox_cls_weights_list (list[Tensor]): BBox cls weights of \\\n                    each level.\n                - bbox_reg_targets_list (list[Tensor]): BBox reg targets of \\\n                    each level.\n                - bbox_reg_weights_list (list[Tensor]): BBox reg weights of \\\n                    each level.\n                - num_total_pos (int): Number of positive samples in all \\\n                    images.\n                - num_total_neg (int): Number of negative samples in all \\\n                    images.\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(approx_list) == len(inside_flag_list) == len(\n            square_list) == num_imgs\n        # anchor number of multi levels\n        num_level_squares = [squares.size(0) for squares in square_list[0]]\n        # concat all level anchors and flags to a single tensor\n        inside_flag_flat_list = []\n        approx_flat_list = []\n        square_flat_list = []\n        for i in range(num_imgs):\n            assert len(square_list[i]) == len(inside_flag_list[i])\n            inside_flag_flat_list.append(torch.cat(inside_flag_list[i]))\n            approx_flat_list.append(torch.cat(approx_list[i]))\n            square_flat_list.append(torch.cat(square_list[i]))\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        (all_labels, all_label_weights, all_bbox_cls_targets,\n         all_bbox_cls_weights, all_bbox_reg_targets, all_bbox_reg_weights,\n         pos_inds_list, neg_inds_list) = multi_apply(\n             self._get_target_single,\n             approx_flat_list,\n             inside_flag_flat_list,\n             square_flat_list,\n             gt_bboxes_list,\n             gt_bboxes_ignore_list,\n             gt_labels_list,\n             img_metas,\n             label_channels=label_channels,\n             sampling=sampling,\n             unmap_outputs=unmap_outputs)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n        # split targets to a list w.r.t. multiple levels\n        labels_list = images_to_levels(all_labels, num_level_squares)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_squares)\n        bbox_cls_targets_list = images_to_levels(all_bbox_cls_targets,\n                                                 num_level_squares)\n        bbox_cls_weights_list = images_to_levels(all_bbox_cls_weights,\n                                                 num_level_squares)\n        bbox_reg_targets_list = images_to_levels(all_bbox_reg_targets,\n                                                 num_level_squares)\n        bbox_reg_weights_list = images_to_levels(all_bbox_reg_weights,\n                                                 num_level_squares)\n        return (labels_list, label_weights_list, bbox_cls_targets_list,\n                bbox_cls_weights_list, bbox_reg_targets_list,\n                bbox_reg_weights_list, num_total_pos, num_total_neg)\n\n    def _get_target_single(self,\n                           flat_approxs,\n                           inside_flags,\n                           flat_squares,\n                           gt_bboxes,\n                           gt_bboxes_ignore,\n                           gt_labels,\n                           img_meta,\n                           label_channels=None,\n                           sampling=True,\n                           unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in a\n        single image.\n\n        Args:\n            flat_approxs (Tensor): flat approxs of a single image,\n                shape (n, 4)\n            inside_flags (Tensor): inside flags of a single image,\n                shape (n, ).\n            flat_squares (Tensor): flat squares of a single image,\n                shape (approxs_per_octave * n, 4)\n            gt_bboxes (Tensor): Ground truth bboxes of a single image, \\\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            img_meta (dict): Meta info of the image.\n            label_channels (int): Channel of label.\n            sampling (bool): Sample Anchors or not.\n            unmap_outputs (bool): unmap outputs or not.\n\n        Returns:\n            tuple:\n\n                - labels_list (Tensor): Labels in a single image\n                - label_weights (Tensor): Label weights in a single image\n                - bbox_cls_targets (Tensor): BBox cls targets in a single image\n                - bbox_cls_weights (Tensor): BBox cls weights in a single image\n                - bbox_reg_targets (Tensor): BBox reg targets in a single image\n                - bbox_reg_weights (Tensor): BBox reg weights in a single image\n                - num_total_pos (int): Number of positive samples \\\n                    in a single image\n                - num_total_neg (int): Number of negative samples \\\n                    in a single image\n        \"\"\"\n        if not inside_flags.any():\n            return (None, ) * 8\n        # assign gt and sample anchors\n        expand_inside_flags = inside_flags[:, None].expand(\n            -1, self.approxs_per_octave).reshape(-1)\n        approxs = flat_approxs[expand_inside_flags, :]\n        squares = flat_squares[inside_flags, :]\n\n        assign_result = self.assigner.assign(approxs, squares,\n                                             self.approxs_per_octave,\n                                             gt_bboxes, gt_bboxes_ignore)\n        sampling_result = self.sampler.sample(assign_result, squares,\n                                              gt_bboxes)\n\n        num_valid_squares = squares.shape[0]\n        bbox_cls_targets = squares.new_zeros(\n            (num_valid_squares, self.side_num * 4))\n        bbox_cls_weights = squares.new_zeros(\n            (num_valid_squares, self.side_num * 4))\n        bbox_reg_targets = squares.new_zeros(\n            (num_valid_squares, self.side_num * 4))\n        bbox_reg_weights = squares.new_zeros(\n            (num_valid_squares, self.side_num * 4))\n        labels = squares.new_full((num_valid_squares, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = squares.new_zeros(num_valid_squares, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            (pos_bbox_reg_targets, pos_bbox_reg_weights, pos_bbox_cls_targets,\n             pos_bbox_cls_weights) = self.bbox_coder.encode(\n                 sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes)\n\n            bbox_cls_targets[pos_inds, :] = pos_bbox_cls_targets\n            bbox_reg_targets[pos_inds, :] = pos_bbox_reg_targets\n            bbox_cls_weights[pos_inds, :] = pos_bbox_cls_weights\n            bbox_reg_weights[pos_inds, :] = pos_bbox_reg_weights\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_squares.size(0)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags, fill=self.num_classes)\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_cls_targets = unmap(bbox_cls_targets, num_total_anchors,\n                                     inside_flags)\n            bbox_cls_weights = unmap(bbox_cls_weights, num_total_anchors,\n                                     inside_flags)\n            bbox_reg_targets = unmap(bbox_reg_targets, num_total_anchors,\n                                     inside_flags)\n            bbox_reg_weights = unmap(bbox_reg_weights, num_total_anchors,\n                                     inside_flags)\n        return (labels, label_weights, bbox_cls_targets, bbox_cls_weights,\n                bbox_reg_targets, bbox_reg_weights, pos_inds, neg_inds)\n\n    def loss_single(self, cls_score, bbox_pred, labels, label_weights,\n                    bbox_cls_targets, bbox_cls_weights, bbox_reg_targets,\n                    bbox_reg_weights, num_total_samples):\n        # classification loss\n        labels = labels.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        cls_score = cls_score.permute(0, 2, 3,\n                                      1).reshape(-1, self.cls_out_channels)\n        loss_cls = self.loss_cls(\n            cls_score, labels, label_weights, avg_factor=num_total_samples)\n        # regression loss\n        bbox_cls_targets = bbox_cls_targets.reshape(-1, self.side_num * 4)\n        bbox_cls_weights = bbox_cls_weights.reshape(-1, self.side_num * 4)\n        bbox_reg_targets = bbox_reg_targets.reshape(-1, self.side_num * 4)\n        bbox_reg_weights = bbox_reg_weights.reshape(-1, self.side_num * 4)\n        (bbox_cls_pred, bbox_reg_pred) = bbox_pred\n        bbox_cls_pred = bbox_cls_pred.permute(0, 2, 3, 1).reshape(\n            -1, self.side_num * 4)\n        bbox_reg_pred = bbox_reg_pred.permute(0, 2, 3, 1).reshape(\n            -1, self.side_num * 4)\n        loss_bbox_cls = self.loss_bbox_cls(\n            bbox_cls_pred,\n            bbox_cls_targets.long(),\n            bbox_cls_weights,\n            avg_factor=num_total_samples * 4 * self.side_num)\n        loss_bbox_reg = self.loss_bbox_reg(\n            bbox_reg_pred,\n            bbox_reg_targets,\n            bbox_reg_weights,\n            avg_factor=num_total_samples * 4 * self.bbox_coder.offset_topk)\n        return loss_cls, loss_bbox_cls, loss_bbox_reg\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.approx_anchor_generator.num_levels\n\n        device = cls_scores[0].device\n\n        # get sampled approxes\n        approxs_list, inside_flag_list = GuidedAnchorHead.get_sampled_approxs(\n            self, featmap_sizes, img_metas, device=device)\n\n        square_list = self.get_anchors(featmap_sizes, img_metas, device=device)\n\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        cls_reg_targets = self.get_target(\n            approxs_list,\n            inside_flag_list,\n            square_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n            sampling=self.sampling)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_cls_targets_list,\n         bbox_cls_weights_list, bbox_reg_targets_list, bbox_reg_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n        num_total_samples = (\n            num_total_pos + num_total_neg if self.sampling else num_total_pos)\n        losses_cls, losses_bbox_cls, losses_bbox_reg = multi_apply(\n            self.loss_single,\n            cls_scores,\n            bbox_preds,\n            labels_list,\n            label_weights_list,\n            bbox_cls_targets_list,\n            bbox_cls_weights_list,\n            bbox_reg_targets_list,\n            bbox_reg_weights_list,\n            num_total_samples=num_total_samples)\n        return dict(\n            loss_cls=losses_cls,\n            loss_bbox_cls=losses_bbox_cls,\n            loss_bbox_reg=losses_bbox_reg)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   img_metas,\n                   cfg=None,\n                   rescale=False):\n        assert len(cls_scores) == len(bbox_preds)\n        num_levels = len(cls_scores)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n\n        device = cls_scores[0].device\n        mlvl_anchors = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = [\n                cls_scores[i][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_cls_pred_list = [\n                bbox_preds[i][0][img_id].detach() for i in range(num_levels)\n            ]\n            bbox_reg_pred_list = [\n                bbox_preds[i][1][img_id].detach() for i in range(num_levels)\n            ]\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            proposals = self._get_bboxes_single(\n                cls_score_list, bbox_cls_pred_list, bbox_reg_pred_list,\n                mlvl_anchors[img_id], img_shape, scale_factor, cfg, rescale)\n            result_list.append(proposals)\n        return result_list\n\n    def _get_bboxes_single(self,\n                           cls_scores,\n                           bbox_cls_preds,\n                           bbox_reg_preds,\n                           mlvl_anchors,\n                           img_shape,\n                           scale_factor,\n                           cfg,\n                           rescale=False):\n        cfg = self.test_cfg if cfg is None else cfg\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_confids = []\n        mlvl_labels = []\n        assert len(cls_scores) == len(bbox_cls_preds) == len(\n            bbox_reg_preds) == len(mlvl_anchors)\n        for cls_score, bbox_cls_pred, bbox_reg_pred, anchors in zip(\n                cls_scores, bbox_cls_preds, bbox_reg_preds, mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_cls_pred.size(\n            )[-2:] == bbox_reg_pred.size()[-2::]\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)[:, :-1]\n            bbox_cls_pred = bbox_cls_pred.permute(1, 2, 0).reshape(\n                -1, self.side_num * 4)\n            bbox_reg_pred = bbox_reg_pred.permute(1, 2, 0).reshape(\n                -1, self.side_num * 4)\n\n            # After https://github.com/open-mmlab/mmdetection/pull/6268/,\n            # this operation keeps fewer bboxes under the same `nms_pre`.\n            # There is no difference in performance for most models. If you\n            # find a slight drop in performance, you can set a larger\n            # `nms_pre` than before.\n            results = filter_scores_and_topk(\n                scores, cfg.score_thr, nms_pre,\n                dict(\n                    anchors=anchors,\n                    bbox_cls_pred=bbox_cls_pred,\n                    bbox_reg_pred=bbox_reg_pred))\n            scores, labels, _, filtered_results = results\n\n            anchors = filtered_results['anchors']\n            bbox_cls_pred = filtered_results['bbox_cls_pred']\n            bbox_reg_pred = filtered_results['bbox_reg_pred']\n\n            bbox_preds = [\n                bbox_cls_pred.contiguous(),\n                bbox_reg_pred.contiguous()\n            ]\n            bboxes, confids = self.bbox_coder.decode(\n                anchors.contiguous(), bbox_preds, max_shape=img_shape)\n\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_confids.append(confids)\n            mlvl_labels.append(labels)\n        return self._bbox_post_process(mlvl_scores, mlvl_labels, mlvl_bboxes,\n                                       scale_factor, cfg, rescale, True,\n                                       mlvl_confids)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/solo_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\n\nfrom mmdet.core import InstanceData, mask_matrix_nms, multi_apply\nfrom mmdet.core.utils import center_of_mass, generate_coordinate\nfrom mmdet.models.builder import HEADS, build_loss\nfrom mmdet.utils.misc import floordiv\nfrom .base_mask_head import BaseMaskHead\n\n\n@HEADS.register_module()\nclass SOLOHead(BaseMaskHead):\n    \"\"\"SOLO mask head used in `SOLO: Segmenting Objects by Locations.\n\n    <https://arxiv.org/abs/1912.04488>`_\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels. Used in child classes.\n            Default: 256.\n        stacked_convs (int): Number of stacking convs of the head.\n            Default: 4.\n        strides (tuple): Downsample factor of each feature map.\n        scale_ranges (tuple[tuple[int, int]]): Area range of multiple\n            level masks, in the format [(min1, max1), (min2, max2), ...].\n            A range of (16, 64) means the area range between (16, 64).\n        pos_scale (float): Constant scale factor to control the center region.\n        num_grids (list[int]): Divided image into a uniform grids, each\n            feature map has a different grid value. The number of output\n            channels is grid ** 2. Default: [40, 36, 24, 16, 12].\n        cls_down_index (int): The index of downsample operation in\n            classification branch. Default: 0.\n        loss_mask (dict): Config of mask loss.\n        loss_cls (dict): Config of classification loss.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32,\n                                   requires_grad=True).\n        train_cfg (dict): Training config of head.\n        test_cfg (dict): Testing config of head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        in_channels,\n        feat_channels=256,\n        stacked_convs=4,\n        strides=(4, 8, 16, 32, 64),\n        scale_ranges=((8, 32), (16, 64), (32, 128), (64, 256), (128, 512)),\n        pos_scale=0.2,\n        num_grids=[40, 36, 24, 16, 12],\n        cls_down_index=0,\n        loss_mask=None,\n        loss_cls=None,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n        train_cfg=None,\n        test_cfg=None,\n        init_cfg=[\n            dict(type='Normal', layer='Conv2d', std=0.01),\n            dict(\n                type='Normal',\n                std=0.01,\n                bias_prob=0.01,\n                override=dict(name='conv_mask_list')),\n            dict(\n                type='Normal',\n                std=0.01,\n                bias_prob=0.01,\n                override=dict(name='conv_cls'))\n        ],\n    ):\n        super(SOLOHead, self).__init__(init_cfg)\n        self.num_classes = num_classes\n        self.cls_out_channels = self.num_classes\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.strides = strides\n        self.num_grids = num_grids\n        # number of FPN feats\n        self.num_levels = len(strides)\n        assert self.num_levels == len(scale_ranges) == len(num_grids)\n        self.scale_ranges = scale_ranges\n        self.pos_scale = pos_scale\n\n        self.cls_down_index = cls_down_index\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_mask = build_loss(loss_mask)\n        self.norm_cfg = norm_cfg\n        self.init_cfg = init_cfg\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        self._init_layers()\n\n    def _init_layers(self):\n        self.mask_convs = nn.ModuleList()\n        self.cls_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            chn = self.in_channels + 2 if i == 0 else self.feat_channels\n            self.mask_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n        self.conv_mask_list = nn.ModuleList()\n        for num_grid in self.num_grids:\n            self.conv_mask_list.append(\n                nn.Conv2d(self.feat_channels, num_grid**2, 1))\n\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n\n    def resize_feats(self, feats):\n        \"\"\"Downsample the first feat and upsample last feat in feats.\"\"\"\n        out = []\n        for i in range(len(feats)):\n            if i == 0:\n                out.append(\n                    F.interpolate(\n                        feats[0],\n                        size=feats[i + 1].shape[-2:],\n                        mode='bilinear',\n                        align_corners=False))\n            elif i == len(feats) - 1:\n                out.append(\n                    F.interpolate(\n                        feats[i],\n                        size=feats[i - 1].shape[-2:],\n                        mode='bilinear',\n                        align_corners=False))\n            else:\n                out.append(feats[i])\n        return out\n\n    def forward(self, feats):\n        assert len(feats) == self.num_levels\n        feats = self.resize_feats(feats)\n        mlvl_mask_preds = []\n        mlvl_cls_preds = []\n        for i in range(self.num_levels):\n            x = feats[i]\n            mask_feat = x\n            cls_feat = x\n            # generate and concat the coordinate\n            coord_feat = generate_coordinate(mask_feat.size(),\n                                             mask_feat.device)\n            mask_feat = torch.cat([mask_feat, coord_feat], 1)\n\n            for mask_layer in (self.mask_convs):\n                mask_feat = mask_layer(mask_feat)\n\n            mask_feat = F.interpolate(\n                mask_feat, scale_factor=2, mode='bilinear')\n            mask_pred = self.conv_mask_list[i](mask_feat)\n\n            # cls branch\n            for j, cls_layer in enumerate(self.cls_convs):\n                if j == self.cls_down_index:\n                    num_grid = self.num_grids[i]\n                    cls_feat = F.interpolate(\n                        cls_feat, size=num_grid, mode='bilinear')\n                cls_feat = cls_layer(cls_feat)\n\n            cls_pred = self.conv_cls(cls_feat)\n\n            if not self.training:\n                feat_wh = feats[0].size()[-2:]\n                upsampled_size = (feat_wh[0] * 2, feat_wh[1] * 2)\n                mask_pred = F.interpolate(\n                    mask_pred.sigmoid(), size=upsampled_size, mode='bilinear')\n                cls_pred = cls_pred.sigmoid()\n                # get local maximum\n                local_max = F.max_pool2d(cls_pred, 2, stride=1, padding=1)\n                keep_mask = local_max[:, :, :-1, :-1] == cls_pred\n                cls_pred = cls_pred * keep_mask\n\n            mlvl_mask_preds.append(mask_pred)\n            mlvl_cls_preds.append(cls_pred)\n        return mlvl_mask_preds, mlvl_cls_preds\n\n    def loss(self,\n             mlvl_mask_preds,\n             mlvl_cls_preds,\n             gt_labels,\n             gt_masks,\n             img_metas,\n             gt_bboxes=None,\n             **kwargs):\n        \"\"\"Calculate the loss of total batch.\n\n        Args:\n            mlvl_mask_preds (list[Tensor]): Multi-level mask prediction.\n                Each element in the list has shape\n                (batch_size, num_grids**2 ,h ,w).\n            mlvl_cls_preds (list[Tensor]): Multi-level scores. Each element\n                in the list has shape\n                (batch_size, num_classes, num_grids ,num_grids).\n            gt_labels (list[Tensor]): Labels of multiple images.\n            gt_masks (list[Tensor]): Ground truth masks of multiple images.\n                Each has shape (num_instances, h, w).\n            img_metas (list[dict]): Meta information of multiple images.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of multiple\n                images. Default: None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        num_levels = self.num_levels\n        num_imgs = len(gt_labels)\n\n        featmap_sizes = [featmap.size()[-2:] for featmap in mlvl_mask_preds]\n\n        # `BoolTensor` in `pos_masks` represent\n        # whether the corresponding point is\n        # positive\n        pos_mask_targets, labels, pos_masks = multi_apply(\n            self._get_targets_single,\n            gt_bboxes,\n            gt_labels,\n            gt_masks,\n            featmap_sizes=featmap_sizes)\n\n        # change from the outside list meaning multi images\n        # to the outside list meaning multi levels\n        mlvl_pos_mask_targets = [[] for _ in range(num_levels)]\n        mlvl_pos_mask_preds = [[] for _ in range(num_levels)]\n        mlvl_pos_masks = [[] for _ in range(num_levels)]\n        mlvl_labels = [[] for _ in range(num_levels)]\n        for img_id in range(num_imgs):\n            assert num_levels == len(pos_mask_targets[img_id])\n            for lvl in range(num_levels):\n                mlvl_pos_mask_targets[lvl].append(\n                    pos_mask_targets[img_id][lvl])\n                mlvl_pos_mask_preds[lvl].append(\n                    mlvl_mask_preds[lvl][img_id, pos_masks[img_id][lvl], ...])\n                mlvl_pos_masks[lvl].append(pos_masks[img_id][lvl].flatten())\n                mlvl_labels[lvl].append(labels[img_id][lvl].flatten())\n\n        # cat multiple image\n        temp_mlvl_cls_preds = []\n        for lvl in range(num_levels):\n            mlvl_pos_mask_targets[lvl] = torch.cat(\n                mlvl_pos_mask_targets[lvl], dim=0)\n            mlvl_pos_mask_preds[lvl] = torch.cat(\n                mlvl_pos_mask_preds[lvl], dim=0)\n            mlvl_pos_masks[lvl] = torch.cat(mlvl_pos_masks[lvl], dim=0)\n            mlvl_labels[lvl] = torch.cat(mlvl_labels[lvl], dim=0)\n            temp_mlvl_cls_preds.append(mlvl_cls_preds[lvl].permute(\n                0, 2, 3, 1).reshape(-1, self.cls_out_channels))\n\n        num_pos = sum(item.sum() for item in mlvl_pos_masks)\n        # dice loss\n        loss_mask = []\n        for pred, target in zip(mlvl_pos_mask_preds, mlvl_pos_mask_targets):\n            if pred.size()[0] == 0:\n                loss_mask.append(pred.sum().unsqueeze(0))\n                continue\n            loss_mask.append(\n                self.loss_mask(pred, target, reduction_override='none'))\n        if num_pos > 0:\n            loss_mask = torch.cat(loss_mask).sum() / num_pos\n        else:\n            loss_mask = torch.cat(loss_mask).mean()\n\n        flatten_labels = torch.cat(mlvl_labels)\n        flatten_cls_preds = torch.cat(temp_mlvl_cls_preds)\n        loss_cls = self.loss_cls(\n            flatten_cls_preds, flatten_labels, avg_factor=num_pos + 1)\n        return dict(loss_mask=loss_mask, loss_cls=loss_cls)\n\n    def _get_targets_single(self,\n                            gt_bboxes,\n                            gt_labels,\n                            gt_masks,\n                            featmap_sizes=None):\n        \"\"\"Compute targets for predictions of single image.\n\n        Args:\n            gt_bboxes (Tensor): Ground truth bbox of each instance,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth label of each instance,\n                shape (num_gts,).\n            gt_masks (Tensor): Ground truth mask of each instance,\n                shape (num_gts, h, w).\n            featmap_sizes (list[:obj:`torch.size`]): Size of each\n                feature map from feature pyramid, each element\n                means (feat_h, feat_w). Default: None.\n\n        Returns:\n            Tuple: Usually returns a tuple containing targets for predictions.\n\n                - mlvl_pos_mask_targets (list[Tensor]): Each element represent\n                  the binary mask targets for positive points in this\n                  level, has shape (num_pos, out_h, out_w).\n                - mlvl_labels (list[Tensor]): Each element is\n                  classification labels for all\n                  points in this level, has shape\n                  (num_grid, num_grid).\n                - mlvl_pos_masks (list[Tensor]): Each element is\n                  a `BoolTensor` to represent whether the\n                  corresponding point in single level\n                  is positive, has shape (num_grid **2).\n        \"\"\"\n        device = gt_labels.device\n        gt_areas = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0]) *\n                              (gt_bboxes[:, 3] - gt_bboxes[:, 1]))\n\n        mlvl_pos_mask_targets = []\n        mlvl_labels = []\n        mlvl_pos_masks = []\n        for (lower_bound, upper_bound), stride, featmap_size, num_grid \\\n                in zip(self.scale_ranges, self.strides,\n                       featmap_sizes, self.num_grids):\n\n            mask_target = torch.zeros(\n                [num_grid**2, featmap_size[0], featmap_size[1]],\n                dtype=torch.uint8,\n                device=device)\n            # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n            labels = torch.zeros([num_grid, num_grid],\n                                 dtype=torch.int64,\n                                 device=device) + self.num_classes\n            pos_mask = torch.zeros([num_grid**2],\n                                   dtype=torch.bool,\n                                   device=device)\n\n            gt_inds = ((gt_areas >= lower_bound) &\n                       (gt_areas <= upper_bound)).nonzero().flatten()\n            if len(gt_inds) == 0:\n                mlvl_pos_mask_targets.append(\n                    mask_target.new_zeros(0, featmap_size[0], featmap_size[1]))\n                mlvl_labels.append(labels)\n                mlvl_pos_masks.append(pos_mask)\n                continue\n            hit_gt_bboxes = gt_bboxes[gt_inds]\n            hit_gt_labels = gt_labels[gt_inds]\n            hit_gt_masks = gt_masks[gt_inds, ...]\n\n            pos_w_ranges = 0.5 * (hit_gt_bboxes[:, 2] -\n                                  hit_gt_bboxes[:, 0]) * self.pos_scale\n            pos_h_ranges = 0.5 * (hit_gt_bboxes[:, 3] -\n                                  hit_gt_bboxes[:, 1]) * self.pos_scale\n\n            # Make sure hit_gt_masks has a value\n            valid_mask_flags = hit_gt_masks.sum(dim=-1).sum(dim=-1) > 0\n            output_stride = stride / 2\n\n            for gt_mask, gt_label, pos_h_range, pos_w_range, \\\n                valid_mask_flag in \\\n                    zip(hit_gt_masks, hit_gt_labels, pos_h_ranges,\n                        pos_w_ranges, valid_mask_flags):\n                if not valid_mask_flag:\n                    continue\n                upsampled_size = (featmap_sizes[0][0] * 4,\n                                  featmap_sizes[0][1] * 4)\n                center_h, center_w = center_of_mass(gt_mask)\n\n                coord_w = int(\n                    floordiv((center_w / upsampled_size[1]), (1. / num_grid),\n                             rounding_mode='trunc'))\n                coord_h = int(\n                    floordiv((center_h / upsampled_size[0]), (1. / num_grid),\n                             rounding_mode='trunc'))\n\n                # left, top, right, down\n                top_box = max(\n                    0,\n                    int(\n                        floordiv(\n                            (center_h - pos_h_range) / upsampled_size[0],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n                down_box = min(\n                    num_grid - 1,\n                    int(\n                        floordiv(\n                            (center_h + pos_h_range) / upsampled_size[0],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n                left_box = max(\n                    0,\n                    int(\n                        floordiv(\n                            (center_w - pos_w_range) / upsampled_size[1],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n                right_box = min(\n                    num_grid - 1,\n                    int(\n                        floordiv(\n                            (center_w + pos_w_range) / upsampled_size[1],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n\n                top = max(top_box, coord_h - 1)\n                down = min(down_box, coord_h + 1)\n                left = max(coord_w - 1, left_box)\n                right = min(right_box, coord_w + 1)\n\n                labels[top:(down + 1), left:(right + 1)] = gt_label\n                # ins\n                gt_mask = np.uint8(gt_mask.cpu().numpy())\n                # Follow the original implementation, F.interpolate is\n                # different from cv2 and opencv\n                gt_mask = mmcv.imrescale(gt_mask, scale=1. / output_stride)\n                gt_mask = torch.from_numpy(gt_mask).to(device=device)\n\n                for i in range(top, down + 1):\n                    for j in range(left, right + 1):\n                        index = int(i * num_grid + j)\n                        mask_target[index, :gt_mask.shape[0], :gt_mask.\n                                    shape[1]] = gt_mask\n                        pos_mask[index] = True\n            mlvl_pos_mask_targets.append(mask_target[pos_mask])\n            mlvl_labels.append(labels)\n            mlvl_pos_masks.append(pos_mask)\n        return mlvl_pos_mask_targets, mlvl_labels, mlvl_pos_masks\n\n    def get_results(self, mlvl_mask_preds, mlvl_cls_scores, img_metas,\n                    **kwargs):\n        \"\"\"Get multi-image mask results.\n\n        Args:\n            mlvl_mask_preds (list[Tensor]): Multi-level mask prediction.\n                Each element in the list has shape\n                (batch_size, num_grids**2 ,h ,w).\n            mlvl_cls_scores (list[Tensor]): Multi-level scores. Each element\n                in the list has shape\n                (batch_size, num_classes, num_grids ,num_grids).\n            img_metas (list[dict]): Meta information of all images.\n\n        Returns:\n            list[:obj:`InstanceData`]: Processed results of multiple\n            images.Each :obj:`InstanceData` usually contains\n            following keys.\n\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,).\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n        \"\"\"\n        mlvl_cls_scores = [\n            item.permute(0, 2, 3, 1) for item in mlvl_cls_scores\n        ]\n        assert len(mlvl_mask_preds) == len(mlvl_cls_scores)\n        num_levels = len(mlvl_cls_scores)\n\n        results_list = []\n        for img_id in range(len(img_metas)):\n            cls_pred_list = [\n                mlvl_cls_scores[lvl][img_id].view(-1, self.cls_out_channels)\n                for lvl in range(num_levels)\n            ]\n            mask_pred_list = [\n                mlvl_mask_preds[lvl][img_id] for lvl in range(num_levels)\n            ]\n\n            cls_pred_list = torch.cat(cls_pred_list, dim=0)\n            mask_pred_list = torch.cat(mask_pred_list, dim=0)\n\n            results = self._get_results_single(\n                cls_pred_list, mask_pred_list, img_meta=img_metas[img_id])\n            results_list.append(results)\n\n        return results_list\n\n    def _get_results_single(self, cls_scores, mask_preds, img_meta, cfg=None):\n        \"\"\"Get processed mask related results of single image.\n\n        Args:\n            cls_scores (Tensor): Classification score of all points\n                in single image, has shape (num_points, num_classes).\n            mask_preds (Tensor): Mask prediction of all points in\n                single image, has shape (num_points, feat_h, feat_w).\n            img_meta (dict): Meta information of corresponding image.\n            cfg (dict, optional): Config used in test phase.\n                Default: None.\n\n        Returns:\n            :obj:`InstanceData`: Processed results of single image.\n             it usually contains following keys.\n\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,).\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n        \"\"\"\n\n        def empty_results(results, cls_scores):\n            \"\"\"Generate a empty results.\"\"\"\n            results.scores = cls_scores.new_ones(0)\n            results.masks = cls_scores.new_zeros(0, *results.ori_shape[:2])\n            results.labels = cls_scores.new_ones(0)\n            return results\n\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_scores) == len(mask_preds)\n        results = InstanceData(img_meta)\n\n        featmap_size = mask_preds.size()[-2:]\n\n        img_shape = results.img_shape\n        ori_shape = results.ori_shape\n\n        h, w, _ = img_shape\n        upsampled_size = (featmap_size[0] * 4, featmap_size[1] * 4)\n\n        score_mask = (cls_scores > cfg.score_thr)\n        cls_scores = cls_scores[score_mask]\n        if len(cls_scores) == 0:\n            return empty_results(results, cls_scores)\n\n        inds = score_mask.nonzero()\n        cls_labels = inds[:, 1]\n\n        # Filter the mask mask with an area is smaller than\n        # stride of corresponding feature level\n        lvl_interval = cls_labels.new_tensor(self.num_grids).pow(2).cumsum(0)\n        strides = cls_scores.new_ones(lvl_interval[-1])\n        strides[:lvl_interval[0]] *= self.strides[0]\n        for lvl in range(1, self.num_levels):\n            strides[lvl_interval[lvl -\n                                 1]:lvl_interval[lvl]] *= self.strides[lvl]\n        strides = strides[inds[:, 0]]\n        mask_preds = mask_preds[inds[:, 0]]\n\n        masks = mask_preds > cfg.mask_thr\n        sum_masks = masks.sum((1, 2)).float()\n        keep = sum_masks > strides\n        if keep.sum() == 0:\n            return empty_results(results, cls_scores)\n        masks = masks[keep]\n        mask_preds = mask_preds[keep]\n        sum_masks = sum_masks[keep]\n        cls_scores = cls_scores[keep]\n        cls_labels = cls_labels[keep]\n\n        # maskness.\n        mask_scores = (mask_preds * masks).sum((1, 2)) / sum_masks\n        cls_scores *= mask_scores\n\n        scores, labels, _, keep_inds = mask_matrix_nms(\n            masks,\n            cls_labels,\n            cls_scores,\n            mask_area=sum_masks,\n            nms_pre=cfg.nms_pre,\n            max_num=cfg.max_per_img,\n            kernel=cfg.kernel,\n            sigma=cfg.sigma,\n            filter_thr=cfg.filter_thr)\n        mask_preds = mask_preds[keep_inds]\n        mask_preds = F.interpolate(\n            mask_preds.unsqueeze(0), size=upsampled_size,\n            mode='bilinear')[:, :, :h, :w]\n        mask_preds = F.interpolate(\n            mask_preds, size=ori_shape[:2], mode='bilinear').squeeze(0)\n        masks = mask_preds > cfg.mask_thr\n\n        results.masks = masks\n        results.labels = labels\n        results.scores = scores\n\n        return results\n\n\n@HEADS.register_module()\nclass DecoupledSOLOHead(SOLOHead):\n    \"\"\"Decoupled SOLO mask head used in `SOLO: Segmenting Objects by Locations.\n\n    <https://arxiv.org/abs/1912.04488>`_\n\n    Args:\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 init_cfg=[\n                     dict(type='Normal', layer='Conv2d', std=0.01),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_mask_list_x')),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_mask_list_y')),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_cls'))\n                 ],\n                 **kwargs):\n        super(DecoupledSOLOHead, self).__init__(\n            *args, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        self.mask_convs_x = nn.ModuleList()\n        self.mask_convs_y = nn.ModuleList()\n        self.cls_convs = nn.ModuleList()\n\n        for i in range(self.stacked_convs):\n            chn = self.in_channels + 1 if i == 0 else self.feat_channels\n            self.mask_convs_x.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n            self.mask_convs_y.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n\n        self.conv_mask_list_x = nn.ModuleList()\n        self.conv_mask_list_y = nn.ModuleList()\n        for num_grid in self.num_grids:\n            self.conv_mask_list_x.append(\n                nn.Conv2d(self.feat_channels, num_grid, 3, padding=1))\n            self.conv_mask_list_y.append(\n                nn.Conv2d(self.feat_channels, num_grid, 3, padding=1))\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n\n    def forward(self, feats):\n        assert len(feats) == self.num_levels\n        feats = self.resize_feats(feats)\n        mask_preds_x = []\n        mask_preds_y = []\n        cls_preds = []\n        for i in range(self.num_levels):\n            x = feats[i]\n            mask_feat = x\n            cls_feat = x\n            # generate and concat the coordinate\n            coord_feat = generate_coordinate(mask_feat.size(),\n                                             mask_feat.device)\n            mask_feat_x = torch.cat([mask_feat, coord_feat[:, 0:1, ...]], 1)\n            mask_feat_y = torch.cat([mask_feat, coord_feat[:, 1:2, ...]], 1)\n\n            for mask_layer_x, mask_layer_y in \\\n                    zip(self.mask_convs_x, self.mask_convs_y):\n                mask_feat_x = mask_layer_x(mask_feat_x)\n                mask_feat_y = mask_layer_y(mask_feat_y)\n\n            mask_feat_x = F.interpolate(\n                mask_feat_x, scale_factor=2, mode='bilinear')\n            mask_feat_y = F.interpolate(\n                mask_feat_y, scale_factor=2, mode='bilinear')\n\n            mask_pred_x = self.conv_mask_list_x[i](mask_feat_x)\n            mask_pred_y = self.conv_mask_list_y[i](mask_feat_y)\n\n            # cls branch\n            for j, cls_layer in enumerate(self.cls_convs):\n                if j == self.cls_down_index:\n                    num_grid = self.num_grids[i]\n                    cls_feat = F.interpolate(\n                        cls_feat, size=num_grid, mode='bilinear')\n                cls_feat = cls_layer(cls_feat)\n\n            cls_pred = self.conv_cls(cls_feat)\n\n            if not self.training:\n                feat_wh = feats[0].size()[-2:]\n                upsampled_size = (feat_wh[0] * 2, feat_wh[1] * 2)\n                mask_pred_x = F.interpolate(\n                    mask_pred_x.sigmoid(),\n                    size=upsampled_size,\n                    mode='bilinear')\n                mask_pred_y = F.interpolate(\n                    mask_pred_y.sigmoid(),\n                    size=upsampled_size,\n                    mode='bilinear')\n                cls_pred = cls_pred.sigmoid()\n                # get local maximum\n                local_max = F.max_pool2d(cls_pred, 2, stride=1, padding=1)\n                keep_mask = local_max[:, :, :-1, :-1] == cls_pred\n                cls_pred = cls_pred * keep_mask\n\n            mask_preds_x.append(mask_pred_x)\n            mask_preds_y.append(mask_pred_y)\n            cls_preds.append(cls_pred)\n        return mask_preds_x, mask_preds_y, cls_preds\n\n    def loss(self,\n             mlvl_mask_preds_x,\n             mlvl_mask_preds_y,\n             mlvl_cls_preds,\n             gt_labels,\n             gt_masks,\n             img_metas,\n             gt_bboxes=None,\n             **kwargs):\n        \"\"\"Calculate the loss of total batch.\n\n        Args:\n            mlvl_mask_preds_x (list[Tensor]): Multi-level mask prediction\n                from x branch. Each element in the list has shape\n                (batch_size, num_grids ,h ,w).\n            mlvl_mask_preds_x (list[Tensor]): Multi-level mask prediction\n                from y branch. Each element in the list has shape\n                (batch_size, num_grids ,h ,w).\n            mlvl_cls_preds (list[Tensor]): Multi-level scores. Each element\n                in the list has shape\n                (batch_size, num_classes, num_grids ,num_grids).\n            gt_labels (list[Tensor]): Labels of multiple images.\n            gt_masks (list[Tensor]): Ground truth masks of multiple images.\n                Each has shape (num_instances, h, w).\n            img_metas (list[dict]): Meta information of multiple images.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of multiple\n                images. Default: None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        num_levels = self.num_levels\n        num_imgs = len(gt_labels)\n        featmap_sizes = [featmap.size()[-2:] for featmap in mlvl_mask_preds_x]\n\n        pos_mask_targets, labels, \\\n            xy_pos_indexes = \\\n            multi_apply(self._get_targets_single,\n                        gt_bboxes,\n                        gt_labels,\n                        gt_masks,\n                        featmap_sizes=featmap_sizes)\n\n        # change from the outside list meaning multi images\n        # to the outside list meaning multi levels\n        mlvl_pos_mask_targets = [[] for _ in range(num_levels)]\n        mlvl_pos_mask_preds_x = [[] for _ in range(num_levels)]\n        mlvl_pos_mask_preds_y = [[] for _ in range(num_levels)]\n        mlvl_labels = [[] for _ in range(num_levels)]\n        for img_id in range(num_imgs):\n\n            for lvl in range(num_levels):\n                mlvl_pos_mask_targets[lvl].append(\n                    pos_mask_targets[img_id][lvl])\n                mlvl_pos_mask_preds_x[lvl].append(\n                    mlvl_mask_preds_x[lvl][img_id,\n                                           xy_pos_indexes[img_id][lvl][:, 1]])\n                mlvl_pos_mask_preds_y[lvl].append(\n                    mlvl_mask_preds_y[lvl][img_id,\n                                           xy_pos_indexes[img_id][lvl][:, 0]])\n                mlvl_labels[lvl].append(labels[img_id][lvl].flatten())\n\n        # cat multiple image\n        temp_mlvl_cls_preds = []\n        for lvl in range(num_levels):\n            mlvl_pos_mask_targets[lvl] = torch.cat(\n                mlvl_pos_mask_targets[lvl], dim=0)\n            mlvl_pos_mask_preds_x[lvl] = torch.cat(\n                mlvl_pos_mask_preds_x[lvl], dim=0)\n            mlvl_pos_mask_preds_y[lvl] = torch.cat(\n                mlvl_pos_mask_preds_y[lvl], dim=0)\n            mlvl_labels[lvl] = torch.cat(mlvl_labels[lvl], dim=0)\n            temp_mlvl_cls_preds.append(mlvl_cls_preds[lvl].permute(\n                0, 2, 3, 1).reshape(-1, self.cls_out_channels))\n\n        num_pos = 0.\n        # dice loss\n        loss_mask = []\n        for pred_x, pred_y, target in \\\n                zip(mlvl_pos_mask_preds_x,\n                    mlvl_pos_mask_preds_y, mlvl_pos_mask_targets):\n            num_masks = pred_x.size(0)\n            if num_masks == 0:\n                # make sure can get grad\n                loss_mask.append((pred_x.sum() + pred_y.sum()).unsqueeze(0))\n                continue\n            num_pos += num_masks\n            pred_mask = pred_y.sigmoid() * pred_x.sigmoid()\n            loss_mask.append(\n                self.loss_mask(pred_mask, target, reduction_override='none'))\n        if num_pos > 0:\n            loss_mask = torch.cat(loss_mask).sum() / num_pos\n        else:\n            loss_mask = torch.cat(loss_mask).mean()\n\n        # cate\n        flatten_labels = torch.cat(mlvl_labels)\n        flatten_cls_preds = torch.cat(temp_mlvl_cls_preds)\n\n        loss_cls = self.loss_cls(\n            flatten_cls_preds, flatten_labels, avg_factor=num_pos + 1)\n        return dict(loss_mask=loss_mask, loss_cls=loss_cls)\n\n    def _get_targets_single(self,\n                            gt_bboxes,\n                            gt_labels,\n                            gt_masks,\n                            featmap_sizes=None):\n        \"\"\"Compute targets for predictions of single image.\n\n        Args:\n            gt_bboxes (Tensor): Ground truth bbox of each instance,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth label of each instance,\n                shape (num_gts,).\n            gt_masks (Tensor): Ground truth mask of each instance,\n                shape (num_gts, h, w).\n            featmap_sizes (list[:obj:`torch.size`]): Size of each\n                feature map from feature pyramid, each element\n                means (feat_h, feat_w). Default: None.\n\n        Returns:\n            Tuple: Usually returns a tuple containing targets for predictions.\n\n                - mlvl_pos_mask_targets (list[Tensor]): Each element represent\n                  the binary mask targets for positive points in this\n                  level, has shape (num_pos, out_h, out_w).\n                - mlvl_labels (list[Tensor]): Each element is\n                  classification labels for all\n                  points in this level, has shape\n                  (num_grid, num_grid).\n                - mlvl_xy_pos_indexes (list[Tensor]): Each element\n                  in the list contains the index of positive samples in\n                  corresponding level, has shape (num_pos, 2), last\n                  dimension 2 present (index_x, index_y).\n        \"\"\"\n        mlvl_pos_mask_targets, mlvl_labels, \\\n            mlvl_pos_masks = \\\n            super()._get_targets_single(gt_bboxes, gt_labels, gt_masks,\n                                        featmap_sizes=featmap_sizes)\n\n        mlvl_xy_pos_indexes = [(item - self.num_classes).nonzero()\n                               for item in mlvl_labels]\n\n        return mlvl_pos_mask_targets, mlvl_labels, mlvl_xy_pos_indexes\n\n    def get_results(self,\n                    mlvl_mask_preds_x,\n                    mlvl_mask_preds_y,\n                    mlvl_cls_scores,\n                    img_metas,\n                    rescale=None,\n                    **kwargs):\n        \"\"\"Get multi-image mask results.\n\n        Args:\n            mlvl_mask_preds_x (list[Tensor]): Multi-level mask prediction\n                from x branch. Each element in the list has shape\n                (batch_size, num_grids ,h ,w).\n            mlvl_mask_preds_y (list[Tensor]): Multi-level mask prediction\n                from y branch. Each element in the list has shape\n                (batch_size, num_grids ,h ,w).\n            mlvl_cls_scores (list[Tensor]): Multi-level scores. Each element\n                in the list has shape\n                (batch_size, num_classes ,num_grids ,num_grids).\n            img_metas (list[dict]): Meta information of all images.\n\n        Returns:\n            list[:obj:`InstanceData`]: Processed results of multiple\n            images.Each :obj:`InstanceData` usually contains\n            following keys.\n\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,).\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n        \"\"\"\n        mlvl_cls_scores = [\n            item.permute(0, 2, 3, 1) for item in mlvl_cls_scores\n        ]\n        assert len(mlvl_mask_preds_x) == len(mlvl_cls_scores)\n        num_levels = len(mlvl_cls_scores)\n\n        results_list = []\n        for img_id in range(len(img_metas)):\n            cls_pred_list = [\n                mlvl_cls_scores[i][img_id].view(\n                    -1, self.cls_out_channels).detach()\n                for i in range(num_levels)\n            ]\n            mask_pred_list_x = [\n                mlvl_mask_preds_x[i][img_id] for i in range(num_levels)\n            ]\n            mask_pred_list_y = [\n                mlvl_mask_preds_y[i][img_id] for i in range(num_levels)\n            ]\n\n            cls_pred_list = torch.cat(cls_pred_list, dim=0)\n            mask_pred_list_x = torch.cat(mask_pred_list_x, dim=0)\n            mask_pred_list_y = torch.cat(mask_pred_list_y, dim=0)\n\n            results = self._get_results_single(\n                cls_pred_list,\n                mask_pred_list_x,\n                mask_pred_list_y,\n                img_meta=img_metas[img_id],\n                cfg=self.test_cfg)\n            results_list.append(results)\n        return results_list\n\n    def _get_results_single(self, cls_scores, mask_preds_x, mask_preds_y,\n                            img_meta, cfg):\n        \"\"\"Get processed mask related results of single image.\n\n        Args:\n            cls_scores (Tensor): Classification score of all points\n                in single image, has shape (num_points, num_classes).\n            mask_preds_x (Tensor): Mask prediction of x branch of\n                all points in single image, has shape\n                (sum_num_grids, feat_h, feat_w).\n            mask_preds_y (Tensor): Mask prediction of y branch of\n                all points in single image, has shape\n                (sum_num_grids, feat_h, feat_w).\n            img_meta (dict): Meta information of corresponding image.\n            cfg (dict): Config used in test phase.\n\n        Returns:\n            :obj:`InstanceData`: Processed results of single image.\n             it usually contains following keys.\n\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,).\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n        \"\"\"\n\n        def empty_results(results, cls_scores):\n            \"\"\"Generate a empty results.\"\"\"\n            results.scores = cls_scores.new_ones(0)\n            results.masks = cls_scores.new_zeros(0, *results.ori_shape[:2])\n            results.labels = cls_scores.new_ones(0)\n            return results\n\n        cfg = self.test_cfg if cfg is None else cfg\n\n        results = InstanceData(img_meta)\n        img_shape = results.img_shape\n        ori_shape = results.ori_shape\n        h, w, _ = img_shape\n        featmap_size = mask_preds_x.size()[-2:]\n        upsampled_size = (featmap_size[0] * 4, featmap_size[1] * 4)\n\n        score_mask = (cls_scores > cfg.score_thr)\n        cls_scores = cls_scores[score_mask]\n        inds = score_mask.nonzero()\n        lvl_interval = inds.new_tensor(self.num_grids).pow(2).cumsum(0)\n        num_all_points = lvl_interval[-1]\n        lvl_start_index = inds.new_ones(num_all_points)\n        num_grids = inds.new_ones(num_all_points)\n        seg_size = inds.new_tensor(self.num_grids).cumsum(0)\n        mask_lvl_start_index = inds.new_ones(num_all_points)\n        strides = inds.new_ones(num_all_points)\n\n        lvl_start_index[:lvl_interval[0]] *= 0\n        mask_lvl_start_index[:lvl_interval[0]] *= 0\n        num_grids[:lvl_interval[0]] *= self.num_grids[0]\n        strides[:lvl_interval[0]] *= self.strides[0]\n\n        for lvl in range(1, self.num_levels):\n            lvl_start_index[lvl_interval[lvl - 1]:lvl_interval[lvl]] *= \\\n                lvl_interval[lvl - 1]\n            mask_lvl_start_index[lvl_interval[lvl - 1]:lvl_interval[lvl]] *= \\\n                seg_size[lvl - 1]\n            num_grids[lvl_interval[lvl - 1]:lvl_interval[lvl]] *= \\\n                self.num_grids[lvl]\n            strides[lvl_interval[lvl - 1]:lvl_interval[lvl]] *= \\\n                self.strides[lvl]\n\n        lvl_start_index = lvl_start_index[inds[:, 0]]\n        mask_lvl_start_index = mask_lvl_start_index[inds[:, 0]]\n        num_grids = num_grids[inds[:, 0]]\n        strides = strides[inds[:, 0]]\n\n        y_lvl_offset = (inds[:, 0] - lvl_start_index) // num_grids\n        x_lvl_offset = (inds[:, 0] - lvl_start_index) % num_grids\n        y_inds = mask_lvl_start_index + y_lvl_offset\n        x_inds = mask_lvl_start_index + x_lvl_offset\n\n        cls_labels = inds[:, 1]\n        mask_preds = mask_preds_x[x_inds, ...] * mask_preds_y[y_inds, ...]\n\n        masks = mask_preds > cfg.mask_thr\n        sum_masks = masks.sum((1, 2)).float()\n        keep = sum_masks > strides\n        if keep.sum() == 0:\n            return empty_results(results, cls_scores)\n\n        masks = masks[keep]\n        mask_preds = mask_preds[keep]\n        sum_masks = sum_masks[keep]\n        cls_scores = cls_scores[keep]\n        cls_labels = cls_labels[keep]\n\n        # maskness.\n        mask_scores = (mask_preds * masks).sum((1, 2)) / sum_masks\n        cls_scores *= mask_scores\n\n        scores, labels, _, keep_inds = mask_matrix_nms(\n            masks,\n            cls_labels,\n            cls_scores,\n            mask_area=sum_masks,\n            nms_pre=cfg.nms_pre,\n            max_num=cfg.max_per_img,\n            kernel=cfg.kernel,\n            sigma=cfg.sigma,\n            filter_thr=cfg.filter_thr)\n        mask_preds = mask_preds[keep_inds]\n        mask_preds = F.interpolate(\n            mask_preds.unsqueeze(0), size=upsampled_size,\n            mode='bilinear')[:, :, :h, :w]\n        mask_preds = F.interpolate(\n            mask_preds, size=ori_shape[:2], mode='bilinear').squeeze(0)\n        masks = mask_preds > cfg.mask_thr\n\n        results.masks = masks\n        results.labels = labels\n        results.scores = scores\n\n        return results\n\n\n@HEADS.register_module()\nclass DecoupledSOLOLightHead(DecoupledSOLOHead):\n    \"\"\"Decoupled Light SOLO mask head used in `SOLO: Segmenting Objects by\n    Locations <https://arxiv.org/abs/1912.04488>`_\n\n    Args:\n        with_dcn (bool): Whether use dcn in mask_convs and cls_convs,\n            default: False.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 dcn_cfg=None,\n                 init_cfg=[\n                     dict(type='Normal', layer='Conv2d', std=0.01),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_mask_list_x')),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_mask_list_y')),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_cls'))\n                 ],\n                 **kwargs):\n        assert dcn_cfg is None or isinstance(dcn_cfg, dict)\n        self.dcn_cfg = dcn_cfg\n        super(DecoupledSOLOLightHead, self).__init__(\n            *args, init_cfg=init_cfg, **kwargs)\n\n    def _init_layers(self):\n        self.mask_convs = nn.ModuleList()\n        self.cls_convs = nn.ModuleList()\n\n        for i in range(self.stacked_convs):\n            if self.dcn_cfg is not None\\\n                    and i == self.stacked_convs - 1:\n                conv_cfg = self.dcn_cfg\n            else:\n                conv_cfg = None\n\n            chn = self.in_channels + 2 if i == 0 else self.feat_channels\n            self.mask_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg))\n\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg))\n\n        self.conv_mask_list_x = nn.ModuleList()\n        self.conv_mask_list_y = nn.ModuleList()\n        for num_grid in self.num_grids:\n            self.conv_mask_list_x.append(\n                nn.Conv2d(self.feat_channels, num_grid, 3, padding=1))\n            self.conv_mask_list_y.append(\n                nn.Conv2d(self.feat_channels, num_grid, 3, padding=1))\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n\n    def forward(self, feats):\n        assert len(feats) == self.num_levels\n        feats = self.resize_feats(feats)\n        mask_preds_x = []\n        mask_preds_y = []\n        cls_preds = []\n        for i in range(self.num_levels):\n            x = feats[i]\n            mask_feat = x\n            cls_feat = x\n            # generate and concat the coordinate\n            coord_feat = generate_coordinate(mask_feat.size(),\n                                             mask_feat.device)\n            mask_feat = torch.cat([mask_feat, coord_feat], 1)\n\n            for mask_layer in self.mask_convs:\n                mask_feat = mask_layer(mask_feat)\n\n            mask_feat = F.interpolate(\n                mask_feat, scale_factor=2, mode='bilinear')\n\n            mask_pred_x = self.conv_mask_list_x[i](mask_feat)\n            mask_pred_y = self.conv_mask_list_y[i](mask_feat)\n\n            # cls branch\n            for j, cls_layer in enumerate(self.cls_convs):\n                if j == self.cls_down_index:\n                    num_grid = self.num_grids[i]\n                    cls_feat = F.interpolate(\n                        cls_feat, size=num_grid, mode='bilinear')\n                cls_feat = cls_layer(cls_feat)\n\n            cls_pred = self.conv_cls(cls_feat)\n\n            if not self.training:\n                feat_wh = feats[0].size()[-2:]\n                upsampled_size = (feat_wh[0] * 2, feat_wh[1] * 2)\n                mask_pred_x = F.interpolate(\n                    mask_pred_x.sigmoid(),\n                    size=upsampled_size,\n                    mode='bilinear')\n                mask_pred_y = F.interpolate(\n                    mask_pred_y.sigmoid(),\n                    size=upsampled_size,\n                    mode='bilinear')\n                cls_pred = cls_pred.sigmoid()\n                # get local maximum\n                local_max = F.max_pool2d(cls_pred, 2, stride=1, padding=1)\n                keep_mask = local_max[:, :, :-1, :-1] == cls_pred\n                cls_pred = cls_pred * keep_mask\n\n            mask_preds_x.append(mask_pred_x)\n            mask_preds_y.append(mask_pred_y)\n            cls_preds.append(cls_pred)\n        return mask_preds_x, mask_preds_y, cls_preds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/solov2_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16, force_fp32\n\nfrom mmdet.core import InstanceData, mask_matrix_nms, multi_apply\nfrom mmdet.core.utils import center_of_mass, generate_coordinate\nfrom mmdet.models.builder import HEADS\nfrom mmdet.utils.misc import floordiv\nfrom .solo_head import SOLOHead\n\n\nclass MaskFeatModule(BaseModule):\n    \"\"\"SOLOv2 mask feature map branch used in `SOLOv2: Dynamic and Fast\n    Instance Segmentation. <https://arxiv.org/pdf/2003.10152>`_\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels of the mask feature\n             map branch.\n        start_level (int): The starting feature map level from RPN that\n             will be used to predict the mask feature map.\n        end_level (int): The ending feature map level from rpn that\n             will be used to predict the mask feature map.\n        out_channels (int): Number of output channels of the mask feature\n             map branch. This is the channel count of the mask\n             feature map that to be dynamically convolved with the predicted\n             kernel.\n        mask_stride (int): Downsample factor of the mask feature map output.\n            Default: 4.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 feat_channels,\n                 start_level,\n                 end_level,\n                 out_channels,\n                 mask_stride=4,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=[dict(type='Normal', layer='Conv2d', std=0.01)]):\n        super().__init__(init_cfg=init_cfg)\n\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.start_level = start_level\n        self.end_level = end_level\n        self.mask_stride = mask_stride\n        assert start_level >= 0 and end_level >= start_level\n        self.out_channels = out_channels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self._init_layers()\n        self.fp16_enabled = False\n\n    def _init_layers(self):\n        self.convs_all_levels = nn.ModuleList()\n        for i in range(self.start_level, self.end_level + 1):\n            convs_per_level = nn.Sequential()\n            if i == 0:\n                convs_per_level.add_module(\n                    f'conv{i}',\n                    ConvModule(\n                        self.in_channels,\n                        self.feat_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        inplace=False))\n                self.convs_all_levels.append(convs_per_level)\n                continue\n\n            for j in range(i):\n                if j == 0:\n                    if i == self.end_level:\n                        chn = self.in_channels + 2\n                    else:\n                        chn = self.in_channels\n                    convs_per_level.add_module(\n                        f'conv{j}',\n                        ConvModule(\n                            chn,\n                            self.feat_channels,\n                            3,\n                            padding=1,\n                            conv_cfg=self.conv_cfg,\n                            norm_cfg=self.norm_cfg,\n                            inplace=False))\n                    convs_per_level.add_module(\n                        f'upsample{j}',\n                        nn.Upsample(\n                            scale_factor=2,\n                            mode='bilinear',\n                            align_corners=False))\n                    continue\n\n                convs_per_level.add_module(\n                    f'conv{j}',\n                    ConvModule(\n                        self.feat_channels,\n                        self.feat_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        inplace=False))\n                convs_per_level.add_module(\n                    f'upsample{j}',\n                    nn.Upsample(\n                        scale_factor=2, mode='bilinear', align_corners=False))\n\n            self.convs_all_levels.append(convs_per_level)\n\n        self.conv_pred = ConvModule(\n            self.feat_channels,\n            self.out_channels,\n            1,\n            padding=0,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg)\n\n    @auto_fp16()\n    def forward(self, feats):\n        inputs = feats[self.start_level:self.end_level + 1]\n        assert len(inputs) == (self.end_level - self.start_level + 1)\n        feature_add_all_level = self.convs_all_levels[0](inputs[0])\n        for i in range(1, len(inputs)):\n            input_p = inputs[i]\n            if i == len(inputs) - 1:\n                coord_feat = generate_coordinate(input_p.size(),\n                                                 input_p.device)\n                input_p = torch.cat([input_p, coord_feat], 1)\n\n            # fix runtime error of \"+=\" inplace operation in PyTorch 1.10\n            feature_add_all_level = feature_add_all_level + \\\n                self.convs_all_levels[i](input_p)\n\n        feature_pred = self.conv_pred(feature_add_all_level)\n        return feature_pred\n\n\n@HEADS.register_module()\nclass SOLOV2Head(SOLOHead):\n    \"\"\"SOLOv2 mask head used in `SOLOv2: Dynamic and Fast Instance\n    Segmentation. <https://arxiv.org/pdf/2003.10152>`_\n\n    Args:\n        mask_feature_head (dict): Config of SOLOv2MaskFeatHead.\n        dynamic_conv_size (int): Dynamic Conv kernel size. Default: 1.\n        dcn_cfg (dict): Dcn conv configurations in kernel_convs and cls_conv.\n            default: None.\n        dcn_apply_to_all_conv (bool): Whether to use dcn in every layer of\n            kernel_convs and cls_convs, or only the last layer. It shall be set\n            `True` for the normal version of SOLOv2 and `False` for the\n            light-weight version. default: True.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 mask_feature_head,\n                 dynamic_conv_size=1,\n                 dcn_cfg=None,\n                 dcn_apply_to_all_conv=True,\n                 init_cfg=[\n                     dict(type='Normal', layer='Conv2d', std=0.01),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         bias_prob=0.01,\n                         override=dict(name='conv_cls'))\n                 ],\n                 **kwargs):\n        assert dcn_cfg is None or isinstance(dcn_cfg, dict)\n        self.dcn_cfg = dcn_cfg\n        self.with_dcn = dcn_cfg is not None\n        self.dcn_apply_to_all_conv = dcn_apply_to_all_conv\n        self.dynamic_conv_size = dynamic_conv_size\n        mask_out_channels = mask_feature_head.get('out_channels')\n        self.kernel_out_channels = \\\n            mask_out_channels * self.dynamic_conv_size * self.dynamic_conv_size\n\n        super().__init__(*args, init_cfg=init_cfg, **kwargs)\n\n        # update the in_channels of mask_feature_head\n        if mask_feature_head.get('in_channels', None) is not None:\n            if mask_feature_head.in_channels != self.in_channels:\n                warnings.warn('The `in_channels` of SOLOv2MaskFeatHead and '\n                              'SOLOv2Head should be same, changing '\n                              'mask_feature_head.in_channels to '\n                              f'{self.in_channels}')\n                mask_feature_head.update(in_channels=self.in_channels)\n        else:\n            mask_feature_head.update(in_channels=self.in_channels)\n\n        self.mask_feature_head = MaskFeatModule(**mask_feature_head)\n        self.mask_stride = self.mask_feature_head.mask_stride\n        self.fp16_enabled = False\n\n    def _init_layers(self):\n        self.cls_convs = nn.ModuleList()\n        self.kernel_convs = nn.ModuleList()\n        conv_cfg = None\n        for i in range(self.stacked_convs):\n            if self.with_dcn:\n                if self.dcn_apply_to_all_conv:\n                    conv_cfg = self.dcn_cfg\n                elif i == self.stacked_convs - 1:\n                    # light head\n                    conv_cfg = self.dcn_cfg\n\n            chn = self.in_channels + 2 if i == 0 else self.feat_channels\n            self.kernel_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.norm_cfg is None))\n\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.cls_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.norm_cfg is None))\n\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n\n        self.conv_kernel = nn.Conv2d(\n            self.feat_channels, self.kernel_out_channels, 3, padding=1)\n\n    @auto_fp16()\n    def forward(self, feats):\n        assert len(feats) == self.num_levels\n        mask_feats = self.mask_feature_head(feats)\n        feats = self.resize_feats(feats)\n        mlvl_kernel_preds = []\n        mlvl_cls_preds = []\n        for i in range(self.num_levels):\n            ins_kernel_feat = feats[i]\n            # ins branch\n            # concat coord\n            coord_feat = generate_coordinate(ins_kernel_feat.size(),\n                                             ins_kernel_feat.device)\n            ins_kernel_feat = torch.cat([ins_kernel_feat, coord_feat], 1)\n\n            # kernel branch\n            kernel_feat = ins_kernel_feat\n            kernel_feat = F.interpolate(\n                kernel_feat,\n                size=self.num_grids[i],\n                mode='bilinear',\n                align_corners=False)\n\n            cate_feat = kernel_feat[:, :-2, :, :]\n\n            kernel_feat = kernel_feat.contiguous()\n            for i, kernel_conv in enumerate(self.kernel_convs):\n                kernel_feat = kernel_conv(kernel_feat)\n            kernel_pred = self.conv_kernel(kernel_feat)\n\n            # cate branch\n            cate_feat = cate_feat.contiguous()\n            for i, cls_conv in enumerate(self.cls_convs):\n                cate_feat = cls_conv(cate_feat)\n            cate_pred = self.conv_cls(cate_feat)\n\n            mlvl_kernel_preds.append(kernel_pred)\n            mlvl_cls_preds.append(cate_pred)\n\n        return mlvl_kernel_preds, mlvl_cls_preds, mask_feats\n\n    def _get_targets_single(self,\n                            gt_bboxes,\n                            gt_labels,\n                            gt_masks,\n                            featmap_size=None):\n        \"\"\"Compute targets for predictions of single image.\n\n        Args:\n            gt_bboxes (Tensor): Ground truth bbox of each instance,\n                shape (num_gts, 4).\n            gt_labels (Tensor): Ground truth label of each instance,\n                shape (num_gts,).\n            gt_masks (Tensor): Ground truth mask of each instance,\n                shape (num_gts, h, w).\n            featmap_sizes (:obj:`torch.size`): Size of UNified mask\n                feature map used to generate instance segmentation\n                masks by dynamic convolution, each element means\n                (feat_h, feat_w). Default: None.\n\n        Returns:\n            Tuple: Usually returns a tuple containing targets for predictions.\n\n                - mlvl_pos_mask_targets (list[Tensor]): Each element represent\n                  the binary mask targets for positive points in this\n                  level, has shape (num_pos, out_h, out_w).\n                - mlvl_labels (list[Tensor]): Each element is\n                  classification labels for all\n                  points in this level, has shape\n                  (num_grid, num_grid).\n                - mlvl_pos_masks  (list[Tensor]): Each element is\n                  a `BoolTensor` to represent whether the\n                  corresponding point in single level\n                  is positive, has shape (num_grid **2).\n                - mlvl_pos_indexes  (list[list]): Each element\n                  in the list contains the positive index in\n                  corresponding level, has shape (num_pos).\n        \"\"\"\n\n        device = gt_labels.device\n        gt_areas = torch.sqrt((gt_bboxes[:, 2] - gt_bboxes[:, 0]) *\n                              (gt_bboxes[:, 3] - gt_bboxes[:, 1]))\n\n        mlvl_pos_mask_targets = []\n        mlvl_pos_indexes = []\n        mlvl_labels = []\n        mlvl_pos_masks = []\n        for (lower_bound, upper_bound), num_grid \\\n                in zip(self.scale_ranges, self.num_grids):\n            mask_target = []\n            # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n            pos_index = []\n            labels = torch.zeros([num_grid, num_grid],\n                                 dtype=torch.int64,\n                                 device=device) + self.num_classes\n            pos_mask = torch.zeros([num_grid**2],\n                                   dtype=torch.bool,\n                                   device=device)\n\n            gt_inds = ((gt_areas >= lower_bound) &\n                       (gt_areas <= upper_bound)).nonzero().flatten()\n            if len(gt_inds) == 0:\n                mlvl_pos_mask_targets.append(\n                    torch.zeros([0, featmap_size[0], featmap_size[1]],\n                                dtype=torch.uint8,\n                                device=device))\n                mlvl_labels.append(labels)\n                mlvl_pos_masks.append(pos_mask)\n                mlvl_pos_indexes.append([])\n                continue\n            hit_gt_bboxes = gt_bboxes[gt_inds]\n            hit_gt_labels = gt_labels[gt_inds]\n            hit_gt_masks = gt_masks[gt_inds, ...]\n\n            pos_w_ranges = 0.5 * (hit_gt_bboxes[:, 2] -\n                                  hit_gt_bboxes[:, 0]) * self.pos_scale\n            pos_h_ranges = 0.5 * (hit_gt_bboxes[:, 3] -\n                                  hit_gt_bboxes[:, 1]) * self.pos_scale\n\n            # Make sure hit_gt_masks has a value\n            valid_mask_flags = hit_gt_masks.sum(dim=-1).sum(dim=-1) > 0\n\n            for gt_mask, gt_label, pos_h_range, pos_w_range, \\\n                valid_mask_flag in \\\n                    zip(hit_gt_masks, hit_gt_labels, pos_h_ranges,\n                        pos_w_ranges, valid_mask_flags):\n                if not valid_mask_flag:\n                    continue\n                upsampled_size = (featmap_size[0] * self.mask_stride,\n                                  featmap_size[1] * self.mask_stride)\n                center_h, center_w = center_of_mass(gt_mask)\n\n                coord_w = int(\n                    floordiv((center_w / upsampled_size[1]), (1. / num_grid),\n                             rounding_mode='trunc'))\n                coord_h = int(\n                    floordiv((center_h / upsampled_size[0]), (1. / num_grid),\n                             rounding_mode='trunc'))\n\n                # left, top, right, down\n                top_box = max(\n                    0,\n                    int(\n                        floordiv(\n                            (center_h - pos_h_range) / upsampled_size[0],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n                down_box = min(\n                    num_grid - 1,\n                    int(\n                        floordiv(\n                            (center_h + pos_h_range) / upsampled_size[0],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n                left_box = max(\n                    0,\n                    int(\n                        floordiv(\n                            (center_w - pos_w_range) / upsampled_size[1],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n                right_box = min(\n                    num_grid - 1,\n                    int(\n                        floordiv(\n                            (center_w + pos_w_range) / upsampled_size[1],\n                            (1. / num_grid),\n                            rounding_mode='trunc')))\n\n                top = max(top_box, coord_h - 1)\n                down = min(down_box, coord_h + 1)\n                left = max(coord_w - 1, left_box)\n                right = min(right_box, coord_w + 1)\n\n                labels[top:(down + 1), left:(right + 1)] = gt_label\n                # ins\n                gt_mask = np.uint8(gt_mask.cpu().numpy())\n                # Follow the original implementation, F.interpolate is\n                # different from cv2 and opencv\n                gt_mask = mmcv.imrescale(gt_mask, scale=1. / self.mask_stride)\n                gt_mask = torch.from_numpy(gt_mask).to(device=device)\n\n                for i in range(top, down + 1):\n                    for j in range(left, right + 1):\n                        index = int(i * num_grid + j)\n                        this_mask_target = torch.zeros(\n                            [featmap_size[0], featmap_size[1]],\n                            dtype=torch.uint8,\n                            device=device)\n                        this_mask_target[:gt_mask.shape[0], :gt_mask.\n                                         shape[1]] = gt_mask\n                        mask_target.append(this_mask_target)\n                        pos_mask[index] = True\n                        pos_index.append(index)\n            if len(mask_target) == 0:\n                mask_target = torch.zeros(\n                    [0, featmap_size[0], featmap_size[1]],\n                    dtype=torch.uint8,\n                    device=device)\n            else:\n                mask_target = torch.stack(mask_target, 0)\n            mlvl_pos_mask_targets.append(mask_target)\n            mlvl_labels.append(labels)\n            mlvl_pos_masks.append(pos_mask)\n            mlvl_pos_indexes.append(pos_index)\n        return (mlvl_pos_mask_targets, mlvl_labels, mlvl_pos_masks,\n                mlvl_pos_indexes)\n\n    @force_fp32(apply_to=('mlvl_kernel_preds', 'mlvl_cls_preds', 'mask_feats'))\n    def loss(self,\n             mlvl_kernel_preds,\n             mlvl_cls_preds,\n             mask_feats,\n             gt_labels,\n             gt_masks,\n             img_metas,\n             gt_bboxes=None,\n             **kwargs):\n        \"\"\"Calculate the loss of total batch.\n\n        Args:\n            mlvl_kernel_preds (list[Tensor]): Multi-level dynamic kernel\n                prediction. The kernel is used to generate instance\n                segmentation masks by dynamic convolution. Each element in the\n                list has shape\n                (batch_size, kernel_out_channels, num_grids, num_grids).\n            mlvl_cls_preds (list[Tensor]): Multi-level scores. Each element\n                in the list has shape\n                (batch_size, num_classes, num_grids, num_grids).\n            mask_feats (Tensor): Unified mask feature map used to generate\n                instance segmentation masks by dynamic convolution. Has shape\n                (batch_size, mask_out_channels, h, w).\n            gt_labels (list[Tensor]): Labels of multiple images.\n            gt_masks (list[Tensor]): Ground truth masks of multiple images.\n                Each has shape (num_instances, h, w).\n            img_metas (list[dict]): Meta information of multiple images.\n            gt_bboxes (list[Tensor]): Ground truth bboxes of multiple\n                images. Default: None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_size = mask_feats.size()[-2:]\n\n        pos_mask_targets, labels, pos_masks, pos_indexes = multi_apply(\n            self._get_targets_single,\n            gt_bboxes,\n            gt_labels,\n            gt_masks,\n            featmap_size=featmap_size)\n\n        mlvl_mask_targets = [\n            torch.cat(lvl_mask_targets, 0)\n            for lvl_mask_targets in zip(*pos_mask_targets)\n        ]\n\n        mlvl_pos_kernel_preds = []\n        for lvl_kernel_preds, lvl_pos_indexes in zip(mlvl_kernel_preds,\n                                                     zip(*pos_indexes)):\n            lvl_pos_kernel_preds = []\n            for img_lvl_kernel_preds, img_lvl_pos_indexes in zip(\n                    lvl_kernel_preds, lvl_pos_indexes):\n                img_lvl_pos_kernel_preds = img_lvl_kernel_preds.view(\n                    img_lvl_kernel_preds.shape[0], -1)[:, img_lvl_pos_indexes]\n                lvl_pos_kernel_preds.append(img_lvl_pos_kernel_preds)\n            mlvl_pos_kernel_preds.append(lvl_pos_kernel_preds)\n\n        # make multilevel mlvl_mask_pred\n        mlvl_mask_preds = []\n        for lvl_pos_kernel_preds in mlvl_pos_kernel_preds:\n            lvl_mask_preds = []\n            for img_id, img_lvl_pos_kernel_pred in enumerate(\n                    lvl_pos_kernel_preds):\n                if img_lvl_pos_kernel_pred.size()[-1] == 0:\n                    continue\n                img_mask_feats = mask_feats[[img_id]]\n                h, w = img_mask_feats.shape[-2:]\n                num_kernel = img_lvl_pos_kernel_pred.shape[1]\n                img_lvl_mask_pred = F.conv2d(\n                    img_mask_feats,\n                    img_lvl_pos_kernel_pred.permute(1, 0).view(\n                        num_kernel, -1, self.dynamic_conv_size,\n                        self.dynamic_conv_size),\n                    stride=1).view(-1, h, w)\n                lvl_mask_preds.append(img_lvl_mask_pred)\n            if len(lvl_mask_preds) == 0:\n                lvl_mask_preds = None\n            else:\n                lvl_mask_preds = torch.cat(lvl_mask_preds, 0)\n            mlvl_mask_preds.append(lvl_mask_preds)\n        # dice loss\n        num_pos = 0\n        for img_pos_masks in pos_masks:\n            for lvl_img_pos_masks in img_pos_masks:\n                num_pos += lvl_img_pos_masks.count_nonzero()\n\n        loss_mask = []\n        for lvl_mask_preds, lvl_mask_targets in zip(mlvl_mask_preds,\n                                                    mlvl_mask_targets):\n            if lvl_mask_preds is None:\n                continue\n            loss_mask.append(\n                self.loss_mask(\n                    lvl_mask_preds,\n                    lvl_mask_targets,\n                    reduction_override='none'))\n        if num_pos > 0:\n            loss_mask = torch.cat(loss_mask).sum() / num_pos\n        else:\n            loss_mask = mask_feats.sum() * 0\n\n        # cate\n        flatten_labels = [\n            torch.cat(\n                [img_lvl_labels.flatten() for img_lvl_labels in lvl_labels])\n            for lvl_labels in zip(*labels)\n        ]\n        flatten_labels = torch.cat(flatten_labels)\n\n        flatten_cls_preds = [\n            lvl_cls_preds.permute(0, 2, 3, 1).reshape(-1, self.num_classes)\n            for lvl_cls_preds in mlvl_cls_preds\n        ]\n        flatten_cls_preds = torch.cat(flatten_cls_preds)\n\n        loss_cls = self.loss_cls(\n            flatten_cls_preds, flatten_labels, avg_factor=num_pos + 1)\n        return dict(loss_mask=loss_mask, loss_cls=loss_cls)\n\n    @force_fp32(\n        apply_to=('mlvl_kernel_preds', 'mlvl_cls_scores', 'mask_feats'))\n    def get_results(self, mlvl_kernel_preds, mlvl_cls_scores, mask_feats,\n                    img_metas, **kwargs):\n        \"\"\"Get multi-image mask results.\n\n        Args:\n            mlvl_kernel_preds (list[Tensor]): Multi-level dynamic kernel\n                prediction. The kernel is used to generate instance\n                segmentation masks by dynamic convolution. Each element in the\n                list has shape\n                (batch_size, kernel_out_channels, num_grids, num_grids).\n            mlvl_cls_scores (list[Tensor]): Multi-level scores. Each element\n                in the list has shape\n                (batch_size, num_classes, num_grids, num_grids).\n            mask_feats (Tensor): Unified mask feature map used to generate\n                instance segmentation masks by dynamic convolution. Has shape\n                (batch_size, mask_out_channels, h, w).\n            img_metas (list[dict]): Meta information of all images.\n\n        Returns:\n            list[:obj:`InstanceData`]: Processed results of multiple\n            images.Each :obj:`InstanceData` usually contains\n            following keys.\n\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,).\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n        \"\"\"\n        num_levels = len(mlvl_cls_scores)\n        assert len(mlvl_kernel_preds) == len(mlvl_cls_scores)\n\n        for lvl in range(num_levels):\n            cls_scores = mlvl_cls_scores[lvl]\n            cls_scores = cls_scores.sigmoid()\n            local_max = F.max_pool2d(cls_scores, 2, stride=1, padding=1)\n            keep_mask = local_max[:, :, :-1, :-1] == cls_scores\n            cls_scores = cls_scores * keep_mask\n            mlvl_cls_scores[lvl] = cls_scores.permute(0, 2, 3, 1)\n\n        result_list = []\n        for img_id in range(len(img_metas)):\n            img_cls_pred = [\n                mlvl_cls_scores[lvl][img_id].view(-1, self.cls_out_channels)\n                for lvl in range(num_levels)\n            ]\n            img_mask_feats = mask_feats[[img_id]]\n            img_kernel_pred = [\n                mlvl_kernel_preds[lvl][img_id].permute(1, 2, 0).view(\n                    -1, self.kernel_out_channels) for lvl in range(num_levels)\n            ]\n            img_cls_pred = torch.cat(img_cls_pred, dim=0)\n            img_kernel_pred = torch.cat(img_kernel_pred, dim=0)\n            result = self._get_results_single(\n                img_kernel_pred,\n                img_cls_pred,\n                img_mask_feats,\n                img_meta=img_metas[img_id])\n            result_list.append(result)\n        return result_list\n\n    def _get_results_single(self,\n                            kernel_preds,\n                            cls_scores,\n                            mask_feats,\n                            img_meta,\n                            cfg=None):\n        \"\"\"Get processed mask related results of single image.\n\n        Args:\n            kernel_preds (Tensor): Dynamic kernel prediction of all points\n                in single image, has shape\n                (num_points, kernel_out_channels).\n            cls_scores (Tensor): Classification score of all points\n                in single image, has shape (num_points, num_classes).\n            mask_preds (Tensor): Mask prediction of all points in\n                single image, has shape (num_points, feat_h, feat_w).\n            img_meta (dict): Meta information of corresponding image.\n            cfg (dict, optional): Config used in test phase.\n                Default: None.\n\n        Returns:\n            :obj:`InstanceData`: Processed results of single image.\n             it usually contains following keys.\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,).\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n        \"\"\"\n\n        def empty_results(results, cls_scores):\n            \"\"\"Generate a empty results.\"\"\"\n            results.scores = cls_scores.new_ones(0)\n            results.masks = cls_scores.new_zeros(0, *results.ori_shape[:2])\n            results.labels = cls_scores.new_ones(0)\n            return results\n\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(kernel_preds) == len(cls_scores)\n        results = InstanceData(img_meta)\n\n        featmap_size = mask_feats.size()[-2:]\n\n        img_shape = results.img_shape\n        ori_shape = results.ori_shape\n\n        # overall info\n        h, w, _ = img_shape\n        upsampled_size = (featmap_size[0] * self.mask_stride,\n                          featmap_size[1] * self.mask_stride)\n\n        # process.\n        score_mask = (cls_scores > cfg.score_thr)\n        cls_scores = cls_scores[score_mask]\n        if len(cls_scores) == 0:\n            return empty_results(results, cls_scores)\n\n        # cate_labels & kernel_preds\n        inds = score_mask.nonzero()\n        cls_labels = inds[:, 1]\n        kernel_preds = kernel_preds[inds[:, 0]]\n\n        # trans vector.\n        lvl_interval = cls_labels.new_tensor(self.num_grids).pow(2).cumsum(0)\n        strides = kernel_preds.new_ones(lvl_interval[-1])\n\n        strides[:lvl_interval[0]] *= self.strides[0]\n        for lvl in range(1, self.num_levels):\n            strides[lvl_interval[lvl -\n                                 1]:lvl_interval[lvl]] *= self.strides[lvl]\n        strides = strides[inds[:, 0]]\n\n        # mask encoding.\n        kernel_preds = kernel_preds.view(\n            kernel_preds.size(0), -1, self.dynamic_conv_size,\n            self.dynamic_conv_size)\n        mask_preds = F.conv2d(\n            mask_feats, kernel_preds, stride=1).squeeze(0).sigmoid()\n        # mask.\n        masks = mask_preds > cfg.mask_thr\n        sum_masks = masks.sum((1, 2)).float()\n        keep = sum_masks > strides\n        if keep.sum() == 0:\n            return empty_results(results, cls_scores)\n        masks = masks[keep]\n        mask_preds = mask_preds[keep]\n        sum_masks = sum_masks[keep]\n        cls_scores = cls_scores[keep]\n        cls_labels = cls_labels[keep]\n\n        # maskness.\n        mask_scores = (mask_preds * masks).sum((1, 2)) / sum_masks\n        cls_scores *= mask_scores\n\n        scores, labels, _, keep_inds = mask_matrix_nms(\n            masks,\n            cls_labels,\n            cls_scores,\n            mask_area=sum_masks,\n            nms_pre=cfg.nms_pre,\n            max_num=cfg.max_per_img,\n            kernel=cfg.kernel,\n            sigma=cfg.sigma,\n            filter_thr=cfg.filter_thr)\n        mask_preds = mask_preds[keep_inds]\n        mask_preds = F.interpolate(\n            mask_preds.unsqueeze(0),\n            size=upsampled_size,\n            mode='bilinear',\n            align_corners=False)[:, :, :h, :w]\n        mask_preds = F.interpolate(\n            mask_preds,\n            size=ori_shape[:2],\n            mode='bilinear',\n            align_corners=False).squeeze(0)\n        masks = mask_preds > cfg.mask_thr\n\n        results.masks = masks\n        results.labels = labels\n        results.scores = scores\n\n        return results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/ssd_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule, DepthwiseSeparableConvModule\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (build_assigner, build_bbox_coder,\n                        build_prior_generator, build_sampler, multi_apply)\nfrom ..builder import HEADS\nfrom ..losses import smooth_l1_loss\nfrom .anchor_head import AnchorHead\n\n\n# TODO: add loss evaluator for SSD\n@HEADS.register_module()\nclass SSDHead(AnchorHead):\n    \"\"\"SSD head used in https://arxiv.org/abs/1512.02325.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        stacked_convs (int): Number of conv layers in cls and reg tower.\n            Default: 0.\n        feat_channels (int): Number of hidden channels when stacked_convs\n            > 0. Default: 256.\n        use_depthwise (bool): Whether to use DepthwiseSeparableConv.\n            Default: False.\n        conv_cfg (dict): Dictionary to construct and config conv layer.\n            Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: None.\n        act_cfg (dict): Dictionary to construct and config activation layer.\n            Default: None.\n        anchor_generator (dict): Config dict for anchor generator\n        bbox_coder (dict): Config of bounding box coder.\n        reg_decoded_bbox (bool): If true, the regression loss would be\n            applied directly on decoded bounding boxes, converting both\n            the predicted boxes and regression targets to absolute\n            coordinates format. Default False. It should be `True` when\n            using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.\n        train_cfg (dict): Training config of anchor head.\n        test_cfg (dict): Testing config of anchor head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_classes=80,\n                 in_channels=(512, 1024, 512, 256, 256, 256),\n                 stacked_convs=0,\n                 feat_channels=256,\n                 use_depthwise=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=None,\n                 anchor_generator=dict(\n                     type='SSDAnchorGenerator',\n                     scale_major=False,\n                     input_size=300,\n                     strides=[8, 16, 32, 64, 100, 300],\n                     ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),\n                     basesize_ratio_range=(0.1, 0.9)),\n                 bbox_coder=dict(\n                     type='DeltaXYWHBBoxCoder',\n                     clip_border=True,\n                     target_means=[.0, .0, .0, .0],\n                     target_stds=[1.0, 1.0, 1.0, 1.0],\n                 ),\n                 reg_decoded_bbox=False,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=dict(\n                     type='Xavier',\n                     layer='Conv2d',\n                     distribution='uniform',\n                     bias=0)):\n        super(AnchorHead, self).__init__(init_cfg)\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.stacked_convs = stacked_convs\n        self.feat_channels = feat_channels\n        self.use_depthwise = use_depthwise\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n\n        self.cls_out_channels = num_classes + 1  # add background class\n        self.prior_generator = build_prior_generator(anchor_generator)\n\n        # Usually the numbers of anchors for each level are the same\n        # except SSD detectors. So it is an int in the most dense\n        # heads but a list of int in SSDHead\n        self.num_base_priors = self.prior_generator.num_base_priors\n\n        self._init_layers()\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.reg_decoded_bbox = reg_decoded_bbox\n        self.use_sigmoid_cls = False\n        self.cls_focal_loss = False\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        # set sampling=False for archor_target\n        self.sampling = False\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # SSD sampling=False so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.fp16_enabled = False\n\n    @property\n    def num_anchors(self):\n        \"\"\"\n        Returns:\n            list[int]: Number of base_anchors on each point of each level.\n        \"\"\"\n        warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '\n                      'please use \"num_base_priors\" instead')\n        return self.num_base_priors\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.cls_convs = nn.ModuleList()\n        self.reg_convs = nn.ModuleList()\n        # TODO: Use registry to choose ConvModule type\n        conv = DepthwiseSeparableConvModule \\\n            if self.use_depthwise else ConvModule\n\n        for channel, num_base_priors in zip(self.in_channels,\n                                            self.num_base_priors):\n            cls_layers = []\n            reg_layers = []\n            in_channel = channel\n            # build stacked conv tower, not used in default ssd\n            for i in range(self.stacked_convs):\n                cls_layers.append(\n                    conv(\n                        in_channel,\n                        self.feat_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        act_cfg=self.act_cfg))\n                reg_layers.append(\n                    conv(\n                        in_channel,\n                        self.feat_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        act_cfg=self.act_cfg))\n                in_channel = self.feat_channels\n            # SSD-Lite head\n            if self.use_depthwise:\n                cls_layers.append(\n                    ConvModule(\n                        in_channel,\n                        in_channel,\n                        3,\n                        padding=1,\n                        groups=in_channel,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        act_cfg=self.act_cfg))\n                reg_layers.append(\n                    ConvModule(\n                        in_channel,\n                        in_channel,\n                        3,\n                        padding=1,\n                        groups=in_channel,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg,\n                        act_cfg=self.act_cfg))\n            cls_layers.append(\n                nn.Conv2d(\n                    in_channel,\n                    num_base_priors * self.cls_out_channels,\n                    kernel_size=1 if self.use_depthwise else 3,\n                    padding=0 if self.use_depthwise else 1))\n            reg_layers.append(\n                nn.Conv2d(\n                    in_channel,\n                    num_base_priors * 4,\n                    kernel_size=1 if self.use_depthwise else 3,\n                    padding=0 if self.use_depthwise else 1))\n            self.cls_convs.append(nn.Sequential(*cls_layers))\n            self.reg_convs.append(nn.Sequential(*reg_layers))\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Classification scores for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * num_classes.\n                bbox_preds (list[Tensor]): Box energies / deltas for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * 4.\n        \"\"\"\n        cls_scores = []\n        bbox_preds = []\n        for feat, reg_conv, cls_conv in zip(feats, self.reg_convs,\n                                            self.cls_convs):\n            cls_scores.append(cls_conv(feat))\n            bbox_preds.append(reg_conv(feat))\n        return cls_scores, bbox_preds\n\n    def loss_single(self, cls_score, bbox_pred, anchor, labels, label_weights,\n                    bbox_targets, bbox_weights, num_total_samples):\n        \"\"\"Compute loss of a single image.\n\n        Args:\n            cls_score (Tensor): Box scores for eachimage\n                Has shape (num_total_anchors, num_classes).\n            bbox_pred (Tensor): Box energies / deltas for each image\n                level with shape (num_total_anchors, 4).\n            anchors (Tensor): Box reference for each scale level with shape\n                (num_total_anchors, 4).\n            labels (Tensor): Labels of each anchors with shape\n                (num_total_anchors,).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (num_total_anchors,)\n            bbox_targets (Tensor): BBox regression targets of each anchor\n                weight shape (num_total_anchors, 4).\n            bbox_weights (Tensor): BBox regression loss weights of each anchor\n                with shape (num_total_anchors, 4).\n            num_total_samples (int): If sampling, num total samples equal to\n                the number of total anchors; Otherwise, it is the number of\n                positive anchors.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n        loss_cls_all = F.cross_entropy(\n            cls_score, labels, reduction='none') * label_weights\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        pos_inds = ((labels >= 0) & (labels < self.num_classes)).nonzero(\n            as_tuple=False).reshape(-1)\n        neg_inds = (labels == self.num_classes).nonzero(\n            as_tuple=False).view(-1)\n\n        num_pos_samples = pos_inds.size(0)\n        num_neg_samples = self.train_cfg.neg_pos_ratio * num_pos_samples\n        if num_neg_samples > neg_inds.size(0):\n            num_neg_samples = neg_inds.size(0)\n        topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples)\n        loss_cls_pos = loss_cls_all[pos_inds].sum()\n        loss_cls_neg = topk_loss_cls_neg.sum()\n        loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples\n\n        if self.reg_decoded_bbox:\n            # When the regression loss (e.g. `IouLoss`, `GIouLoss`)\n            # is applied directly on the decoded bounding boxes, it\n            # decodes the already encoded coordinates to absolute format.\n            bbox_pred = self.bbox_coder.decode(anchor, bbox_pred)\n\n        loss_bbox = smooth_l1_loss(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            beta=self.train_cfg.smoothl1_beta,\n            avg_factor=num_total_samples)\n        return loss_cls[None], loss_bbox\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=1,\n            unmap_outputs=True)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg) = cls_reg_targets\n\n        num_images = len(img_metas)\n        all_cls_scores = torch.cat([\n            s.permute(0, 2, 3, 1).reshape(\n                num_images, -1, self.cls_out_channels) for s in cls_scores\n        ], 1)\n        all_labels = torch.cat(labels_list, -1).view(num_images, -1)\n        all_label_weights = torch.cat(label_weights_list,\n                                      -1).view(num_images, -1)\n        all_bbox_preds = torch.cat([\n            b.permute(0, 2, 3, 1).reshape(num_images, -1, 4)\n            for b in bbox_preds\n        ], -2)\n        all_bbox_targets = torch.cat(bbox_targets_list,\n                                     -2).view(num_images, -1, 4)\n        all_bbox_weights = torch.cat(bbox_weights_list,\n                                     -2).view(num_images, -1, 4)\n\n        # concat all level anchors to a single tensor\n        all_anchors = []\n        for i in range(num_images):\n            all_anchors.append(torch.cat(anchor_list[i]))\n\n        losses_cls, losses_bbox = multi_apply(\n            self.loss_single,\n            all_cls_scores,\n            all_bbox_preds,\n            all_anchors,\n            all_labels,\n            all_label_weights,\n            all_bbox_targets,\n            all_bbox_weights,\n            num_total_samples=num_total_pos)\n        return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/tood_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule, Scale, bias_init_with_prob, normal_init\nfrom mmcv.ops import deform_conv2d\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (anchor_inside_flags, build_assigner, distance2bbox,\n                        images_to_levels, multi_apply, reduce_mean, unmap)\nfrom mmdet.core.utils import filter_scores_and_topk\nfrom mmdet.models.utils import sigmoid_geometric_mean\nfrom ..builder import HEADS, build_loss\nfrom .atss_head import ATSSHead\n\n\nclass TaskDecomposition(nn.Module):\n    \"\"\"Task decomposition module in task-aligned predictor of TOOD.\n\n    Args:\n        feat_channels (int): Number of feature channels in TOOD head.\n        stacked_convs (int): Number of conv layers in TOOD head.\n        la_down_rate (int): Downsample rate of layer attention.\n        conv_cfg (dict): Config dict for convolution layer.\n        norm_cfg (dict): Config dict for normalization layer.\n    \"\"\"\n\n    def __init__(self,\n                 feat_channels,\n                 stacked_convs,\n                 la_down_rate=8,\n                 conv_cfg=None,\n                 norm_cfg=None):\n        super(TaskDecomposition, self).__init__()\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.in_channels = self.feat_channels * self.stacked_convs\n        self.norm_cfg = norm_cfg\n        self.layer_attention = nn.Sequential(\n            nn.Conv2d(self.in_channels, self.in_channels // la_down_rate, 1),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(\n                self.in_channels // la_down_rate,\n                self.stacked_convs,\n                1,\n                padding=0), nn.Sigmoid())\n\n        self.reduction_conv = ConvModule(\n            self.in_channels,\n            self.feat_channels,\n            1,\n            stride=1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            bias=norm_cfg is None)\n\n    def init_weights(self):\n        for m in self.layer_attention.modules():\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, std=0.001)\n        normal_init(self.reduction_conv.conv, std=0.01)\n\n    def forward(self, feat, avg_feat=None):\n        b, c, h, w = feat.shape\n        if avg_feat is None:\n            avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))\n        weight = self.layer_attention(avg_feat)\n\n        # here we first compute the product between layer attention weight and\n        # conv weight, and then compute the convolution between new conv weight\n        # and feature map, in order to save memory and FLOPs.\n        conv_weight = weight.reshape(\n            b, 1, self.stacked_convs,\n            1) * self.reduction_conv.conv.weight.reshape(\n                1, self.feat_channels, self.stacked_convs, self.feat_channels)\n        conv_weight = conv_weight.reshape(b, self.feat_channels,\n                                          self.in_channels)\n        feat = feat.reshape(b, self.in_channels, h * w)\n        feat = torch.bmm(conv_weight, feat).reshape(b, self.feat_channels, h,\n                                                    w)\n        if self.norm_cfg is not None:\n            feat = self.reduction_conv.norm(feat)\n        feat = self.reduction_conv.activate(feat)\n\n        return feat\n\n\n@HEADS.register_module()\nclass TOODHead(ATSSHead):\n    \"\"\"TOODHead used in `TOOD: Task-aligned One-stage Object Detection.\n\n    <https://arxiv.org/abs/2108.07755>`_.\n\n    TOOD uses Task-aligned head (T-head) and is optimized by Task Alignment\n    Learning (TAL).\n\n    Args:\n        num_dcn (int): Number of deformable convolution in the head.\n            Default: 0.\n        anchor_type (str): If set to `anchor_free`, the head will use centers\n            to regress bboxes. If set to `anchor_based`, the head will\n            regress bboxes based on anchors. Default: `anchor_free`.\n        initial_loss_cls (dict): Config of initial loss.\n\n    Example:\n        >>> self = TOODHead(11, 7)\n        >>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]\n        >>> cls_score, bbox_pred = self.forward(feats)\n        >>> assert len(cls_score) == len(self.scales)\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 num_dcn=0,\n                 anchor_type='anchor_free',\n                 initial_loss_cls=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     activated=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 **kwargs):\n        assert anchor_type in ['anchor_free', 'anchor_based']\n        self.num_dcn = num_dcn\n        self.anchor_type = anchor_type\n        self.epoch = 0  # which would be update in SetEpochInfoHook!\n        super(TOODHead, self).__init__(num_classes, in_channels, **kwargs)\n\n        if self.train_cfg:\n            self.initial_epoch = self.train_cfg.initial_epoch\n            self.initial_assigner = build_assigner(\n                self.train_cfg.initial_assigner)\n            self.initial_loss_cls = build_loss(initial_loss_cls)\n            self.assigner = self.initial_assigner\n            self.alignment_assigner = build_assigner(self.train_cfg.assigner)\n            self.alpha = self.train_cfg.alpha\n            self.beta = self.train_cfg.beta\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.inter_convs = nn.ModuleList()\n        for i in range(self.stacked_convs):\n            if i < self.num_dcn:\n                conv_cfg = dict(type='DCNv2', deform_groups=4)\n            else:\n                conv_cfg = self.conv_cfg\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.inter_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg))\n\n        self.cls_decomp = TaskDecomposition(self.feat_channels,\n                                            self.stacked_convs,\n                                            self.stacked_convs * 8,\n                                            self.conv_cfg, self.norm_cfg)\n        self.reg_decomp = TaskDecomposition(self.feat_channels,\n                                            self.stacked_convs,\n                                            self.stacked_convs * 8,\n                                            self.conv_cfg, self.norm_cfg)\n\n        self.tood_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.tood_reg = nn.Conv2d(\n            self.feat_channels, self.num_base_priors * 4, 3, padding=1)\n\n        self.cls_prob_module = nn.Sequential(\n            nn.Conv2d(self.feat_channels * self.stacked_convs,\n                      self.feat_channels // 4, 1), nn.ReLU(inplace=True),\n            nn.Conv2d(self.feat_channels // 4, 1, 3, padding=1))\n        self.reg_offset_module = nn.Sequential(\n            nn.Conv2d(self.feat_channels * self.stacked_convs,\n                      self.feat_channels // 4, 1), nn.ReLU(inplace=True),\n            nn.Conv2d(self.feat_channels // 4, 4 * 2, 3, padding=1))\n\n        self.scales = nn.ModuleList(\n            [Scale(1.0) for _ in self.prior_generator.strides])\n\n    def init_weights(self):\n        \"\"\"Initialize weights of the head.\"\"\"\n        bias_cls = bias_init_with_prob(0.01)\n        for m in self.inter_convs:\n            normal_init(m.conv, std=0.01)\n        for m in self.cls_prob_module:\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, std=0.01)\n        for m in self.reg_offset_module:\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, std=0.001)\n        normal_init(self.cls_prob_module[-1], std=0.01, bias=bias_cls)\n\n        self.cls_decomp.init_weights()\n        self.reg_decomp.init_weights()\n\n        normal_init(self.tood_cls, std=0.01, bias=bias_cls)\n        normal_init(self.tood_reg, std=0.01)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple: Usually a tuple of classification scores and bbox prediction\n                cls_scores (list[Tensor]): Classification scores for all scale\n                    levels, each is a 4D-tensor, the channels number is\n                    num_anchors * num_classes.\n                bbox_preds (list[Tensor]): Decoded box for all scale levels,\n                    each is a 4D-tensor, the channels number is\n                    num_anchors * 4. In [tl_x, tl_y, br_x, br_y] format.\n        \"\"\"\n        cls_scores = []\n        bbox_preds = []\n        for idx, (x, scale, stride) in enumerate(\n                zip(feats, self.scales, self.prior_generator.strides)):\n            b, c, h, w = x.shape\n            anchor = self.prior_generator.single_level_grid_priors(\n                (h, w), idx, device=x.device)\n            anchor = torch.cat([anchor for _ in range(b)])\n            # extract task interactive features\n            inter_feats = []\n            for inter_conv in self.inter_convs:\n                x = inter_conv(x)\n                inter_feats.append(x)\n            feat = torch.cat(inter_feats, 1)\n\n            # task decomposition\n            avg_feat = F.adaptive_avg_pool2d(feat, (1, 1))\n            cls_feat = self.cls_decomp(feat, avg_feat)\n            reg_feat = self.reg_decomp(feat, avg_feat)\n\n            # cls prediction and alignment\n            cls_logits = self.tood_cls(cls_feat)\n            cls_prob = self.cls_prob_module(feat)\n            cls_score = sigmoid_geometric_mean(cls_logits, cls_prob)\n\n            # reg prediction and alignment\n            if self.anchor_type == 'anchor_free':\n                reg_dist = scale(self.tood_reg(reg_feat).exp()).float()\n                reg_dist = reg_dist.permute(0, 2, 3, 1).reshape(-1, 4)\n                reg_bbox = distance2bbox(\n                    self.anchor_center(anchor) / stride[0],\n                    reg_dist).reshape(b, h, w, 4).permute(0, 3, 1,\n                                                          2)  # (b, c, h, w)\n            elif self.anchor_type == 'anchor_based':\n                reg_dist = scale(self.tood_reg(reg_feat)).float()\n                reg_dist = reg_dist.permute(0, 2, 3, 1).reshape(-1, 4)\n                reg_bbox = self.bbox_coder.decode(anchor, reg_dist).reshape(\n                    b, h, w, 4).permute(0, 3, 1, 2) / stride[0]\n            else:\n                raise NotImplementedError(\n                    f'Unknown anchor type: {self.anchor_type}.'\n                    f'Please use `anchor_free` or `anchor_based`.')\n            reg_offset = self.reg_offset_module(feat)\n            bbox_pred = self.deform_sampling(reg_bbox.contiguous(),\n                                             reg_offset.contiguous())\n\n            # After deform_sampling, some boxes will become invalid (The\n            # left-top point is at the right or bottom of the right-bottom\n            # point), which will make the GIoULoss negative.\n            invalid_bbox_idx = (bbox_pred[:, [0]] > bbox_pred[:, [2]]) | \\\n                               (bbox_pred[:, [1]] > bbox_pred[:, [3]])\n            invalid_bbox_idx = invalid_bbox_idx.expand_as(bbox_pred)\n            bbox_pred = torch.where(invalid_bbox_idx, reg_bbox, bbox_pred)\n\n            cls_scores.append(cls_score)\n            bbox_preds.append(bbox_pred)\n        return tuple(cls_scores), tuple(bbox_preds)\n\n    def deform_sampling(self, feat, offset):\n        \"\"\"Sampling the feature x according to offset.\n\n        Args:\n            feat (Tensor): Feature\n            offset (Tensor): Spatial offset for feature sampling\n        \"\"\"\n        # it is an equivalent implementation of bilinear interpolation\n        b, c, h, w = feat.shape\n        weight = feat.new_ones(c, 1, 1, 1)\n        y = deform_conv2d(feat, offset, weight, 1, 0, 1, c, c)\n        return y\n\n    def anchor_center(self, anchors):\n        \"\"\"Get anchor centers from anchors.\n\n        Args:\n            anchors (Tensor): Anchor list with shape (N, 4), \"xyxy\" format.\n\n        Returns:\n            Tensor: Anchor centers with shape (N, 2), \"xy\" format.\n        \"\"\"\n        anchors_cx = (anchors[:, 2] + anchors[:, 0]) / 2\n        anchors_cy = (anchors[:, 3] + anchors[:, 1]) / 2\n        return torch.stack([anchors_cx, anchors_cy], dim=-1)\n\n    def loss_single(self, anchors, cls_score, bbox_pred, labels, label_weights,\n                    bbox_targets, alignment_metrics, stride):\n        \"\"\"Compute loss of a single scale level.\n\n        Args:\n            anchors (Tensor): Box reference for each scale level with shape\n                (N, num_total_anchors, 4).\n            cls_score (Tensor): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W).\n            bbox_pred (Tensor): Decoded bboxes for each scale\n                level with shape (N, num_anchors * 4, H, W).\n            labels (Tensor): Labels of each anchors with shape\n                (N, num_total_anchors).\n            label_weights (Tensor): Label weights of each anchor with shape\n                (N, num_total_anchors).\n            bbox_targets (Tensor): BBox regression targets of each anchor with\n                shape (N, num_total_anchors, 4).\n            alignment_metrics (Tensor): Alignment metrics with shape\n                (N, num_total_anchors).\n            stride (tuple[int]): Downsample stride of the feature map.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert stride[0] == stride[1], 'h stride is not equal to w stride!'\n        anchors = anchors.reshape(-1, 4)\n        cls_score = cls_score.permute(0, 2, 3, 1).reshape(\n            -1, self.cls_out_channels).contiguous()\n        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4)\n        bbox_targets = bbox_targets.reshape(-1, 4)\n        labels = labels.reshape(-1)\n        alignment_metrics = alignment_metrics.reshape(-1)\n        label_weights = label_weights.reshape(-1)\n        targets = labels if self.epoch < self.initial_epoch else (\n            labels, alignment_metrics)\n        cls_loss_func = self.initial_loss_cls \\\n            if self.epoch < self.initial_epoch else self.loss_cls\n\n        loss_cls = cls_loss_func(\n            cls_score, targets, label_weights, avg_factor=1.0)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = ((labels >= 0)\n                    & (labels < bg_class_ind)).nonzero().squeeze(1)\n\n        if len(pos_inds) > 0:\n            pos_bbox_targets = bbox_targets[pos_inds]\n            pos_bbox_pred = bbox_pred[pos_inds]\n            pos_anchors = anchors[pos_inds]\n\n            pos_decode_bbox_pred = pos_bbox_pred\n            pos_decode_bbox_targets = pos_bbox_targets / stride[0]\n\n            # regression loss\n            pos_bbox_weight = self.centerness_target(\n                pos_anchors, pos_bbox_targets\n            ) if self.epoch < self.initial_epoch else alignment_metrics[\n                pos_inds]\n\n            loss_bbox = self.loss_bbox(\n                pos_decode_bbox_pred,\n                pos_decode_bbox_targets,\n                weight=pos_bbox_weight,\n                avg_factor=1.0)\n        else:\n            loss_bbox = bbox_pred.sum() * 0\n            pos_bbox_weight = bbox_targets.new_tensor(0.)\n\n        return loss_cls, loss_bbox, alignment_metrics.sum(\n        ), pos_bbox_weight.sum()\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Decoded box for each scale\n                level with shape (N, num_anchors * 4, H, W) in\n                [tl_x, tl_y, br_x, br_y] format.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (list[Tensor] | None): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        num_imgs = len(img_metas)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        flatten_cls_scores = torch.cat([\n            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,\n                                                  self.cls_out_channels)\n            for cls_score in cls_scores\n        ], 1)\n        flatten_bbox_preds = torch.cat([\n            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4) * stride[0]\n            for bbox_pred, stride in zip(bbox_preds,\n                                         self.prior_generator.strides)\n        ], 1)\n\n        cls_reg_targets = self.get_targets(\n            flatten_cls_scores,\n            flatten_bbox_preds,\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        (anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         alignment_metrics_list) = cls_reg_targets\n\n        losses_cls, losses_bbox,\\\n            cls_avg_factors, bbox_avg_factors = multi_apply(\n                self.loss_single,\n                anchor_list,\n                cls_scores,\n                bbox_preds,\n                labels_list,\n                label_weights_list,\n                bbox_targets_list,\n                alignment_metrics_list,\n                self.prior_generator.strides)\n\n        cls_avg_factor = reduce_mean(sum(cls_avg_factors)).clamp_(min=1).item()\n        losses_cls = list(map(lambda x: x / cls_avg_factor, losses_cls))\n\n        bbox_avg_factor = reduce_mean(\n            sum(bbox_avg_factors)).clamp_(min=1).item()\n        losses_bbox = list(map(lambda x: x / bbox_avg_factor, losses_bbox))\n        return dict(loss_cls=losses_cls, loss_bbox=losses_bbox)\n\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           score_factor_list,\n                           mlvl_priors,\n                           img_meta,\n                           cfg,\n                           rescale=False,\n                           with_nms=True,\n                           **kwargs):\n        \"\"\"Transform outputs of a single image into bbox predictions.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores from all scale\n                levels of a single image, each item has shape\n                (num_priors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas from\n                all scale levels of a single image, each item has shape\n                (num_priors * 4, H, W).\n            score_factor_list (list[Tensor]): Score factor from all scale\n                levels of a single image, each item has shape\n                (num_priors * 1, H, W).\n            mlvl_priors (list[Tensor]): Each element in the list is\n                the priors of a single level in feature pyramid. In all\n                anchor-based methods, it has shape (num_priors, 4). In\n                all anchor-free methods, it has shape (num_priors, 2)\n                when `with_stride=True`, otherwise it still has shape\n                (num_priors, 4).\n            img_meta (dict): Image meta info.\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            tuple[Tensor]: Results of detected bboxes and labels. If with_nms\n                is False and mlvl_score_factor is None, return mlvl_bboxes and\n                mlvl_scores, else return mlvl_bboxes, mlvl_scores and\n                mlvl_score_factor. Usually with_nms is False is used for aug\n                test. If with_nms is True, then return the following format\n\n                - det_bboxes (Tensor): Predicted bboxes with shape \\\n                    [num_bboxes, 5], where the first 4 columns are bounding \\\n                    box positions (tl_x, tl_y, br_x, br_y) and the 5-th \\\n                    column are scores between 0 and 1.\n                - det_labels (Tensor): Predicted labels of the corresponding \\\n                    box with shape [num_bboxes].\n        \"\"\"\n\n        cfg = self.test_cfg if cfg is None else cfg\n        nms_pre = cfg.get('nms_pre', -1)\n\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_labels = []\n        for cls_score, bbox_pred, priors, stride in zip(\n                cls_score_list, bbox_pred_list, mlvl_priors,\n                self.prior_generator.strides):\n\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4) * stride[0]\n            scores = cls_score.permute(1, 2,\n                                       0).reshape(-1, self.cls_out_channels)\n\n            # After https://github.com/open-mmlab/mmdetection/pull/6268/,\n            # this operation keeps fewer bboxes under the same `nms_pre`.\n            # There is no difference in performance for most models. If you\n            # find a slight drop in performance, you can set a larger\n            # `nms_pre` than before.\n            results = filter_scores_and_topk(\n                scores, cfg.score_thr, nms_pre,\n                dict(bbox_pred=bbox_pred, priors=priors))\n            scores, labels, keep_idxs, filtered_results = results\n\n            bboxes = filtered_results['bbox_pred']\n\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_labels.append(labels)\n\n        return self._bbox_post_process(mlvl_scores, mlvl_labels, mlvl_bboxes,\n                                       img_meta['scale_factor'], cfg, rescale,\n                                       with_nms, None, **kwargs)\n\n    def get_targets(self,\n                    cls_scores,\n                    bbox_preds,\n                    anchor_list,\n                    valid_flag_list,\n                    gt_bboxes_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None,\n                    gt_labels_list=None,\n                    label_channels=1,\n                    unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in\n        multiple images.\n\n        Args:\n            cls_scores (Tensor): Classification predictions of images,\n                a 3D-Tensor with shape [num_imgs, num_priors, num_classes].\n            bbox_preds (Tensor): Decoded bboxes predictions of one image,\n                a 3D-Tensor with shape [num_imgs, num_priors, 4] in [tl_x,\n                tl_y, br_x, br_y] format.\n            anchor_list (list[list[Tensor]]): Multi level anchors of each\n                image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_anchors, 4).\n            valid_flag_list (list[list[Tensor]]): Multi level valid flags of\n                each image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_anchors, )\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be\n                ignored.\n            gt_labels_list (list[Tensor]): Ground truth labels of each box.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: a tuple containing learning targets.\n\n                - anchors_list (list[list[Tensor]]): Anchors of each level.\n                - labels_list (list[Tensor]): Labels of each level.\n                - label_weights_list (list[Tensor]): Label weights of each\n                  level.\n                - bbox_targets_list (list[Tensor]): BBox targets of each level.\n                - norm_alignment_metrics_list (list[Tensor]): Normalized\n                  alignment metrics of each level.\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n        num_level_anchors_list = [num_level_anchors] * num_imgs\n\n        # concat all level anchors and flags to a single tensor\n        for i in range(num_imgs):\n            assert len(anchor_list[i]) == len(valid_flag_list[i])\n            anchor_list[i] = torch.cat(anchor_list[i])\n            valid_flag_list[i] = torch.cat(valid_flag_list[i])\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        # anchor_list: list(b * [-1, 4])\n\n        if self.epoch < self.initial_epoch:\n            (all_anchors, all_labels, all_label_weights, all_bbox_targets,\n             all_bbox_weights, pos_inds_list, neg_inds_list) = multi_apply(\n                 super()._get_target_single,\n                 anchor_list,\n                 valid_flag_list,\n                 num_level_anchors_list,\n                 gt_bboxes_list,\n                 gt_bboxes_ignore_list,\n                 gt_labels_list,\n                 img_metas,\n                 label_channels=label_channels,\n                 unmap_outputs=unmap_outputs)\n            all_assign_metrics = [\n                weight[..., 0] for weight in all_bbox_weights\n            ]\n        else:\n            (all_anchors, all_labels, all_label_weights, all_bbox_targets,\n             all_assign_metrics) = multi_apply(\n                 self._get_target_single,\n                 cls_scores,\n                 bbox_preds,\n                 anchor_list,\n                 valid_flag_list,\n                 gt_bboxes_list,\n                 gt_bboxes_ignore_list,\n                 gt_labels_list,\n                 img_metas,\n                 label_channels=label_channels,\n                 unmap_outputs=unmap_outputs)\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n\n        # split targets to a list w.r.t. multiple levels\n        anchors_list = images_to_levels(all_anchors, num_level_anchors)\n        labels_list = images_to_levels(all_labels, num_level_anchors)\n        label_weights_list = images_to_levels(all_label_weights,\n                                              num_level_anchors)\n        bbox_targets_list = images_to_levels(all_bbox_targets,\n                                             num_level_anchors)\n        norm_alignment_metrics_list = images_to_levels(all_assign_metrics,\n                                                       num_level_anchors)\n\n        return (anchors_list, labels_list, label_weights_list,\n                bbox_targets_list, norm_alignment_metrics_list)\n\n    def _get_target_single(self,\n                           cls_scores,\n                           bbox_preds,\n                           flat_anchors,\n                           valid_flags,\n                           gt_bboxes,\n                           gt_bboxes_ignore,\n                           gt_labels,\n                           img_meta,\n                           label_channels=1,\n                           unmap_outputs=True):\n        \"\"\"Compute regression, classification targets for anchors in a single\n        image.\n\n        Args:\n            cls_scores (list(Tensor)): Box scores for each image.\n            bbox_preds (list(Tensor)): Box energies / deltas for each image.\n            flat_anchors (Tensor): Multi-level anchors of the image, which are\n                concatenated into a single tensor of shape (num_anchors ,4)\n            valid_flags (Tensor): Multi level valid flags of the image,\n                which are concatenated into a single tensor of\n                    shape (num_anchors,).\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            img_meta (dict): Meta info of the image.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: N is the number of total anchors in the image.\n                anchors (Tensor): All anchors in the image with shape (N, 4).\n                labels (Tensor): Labels of all anchors in the image with shape\n                    (N,).\n                label_weights (Tensor): Label weights of all anchor in the\n                    image with shape (N,).\n                bbox_targets (Tensor): BBox targets of all anchors in the\n                    image with shape (N, 4).\n                norm_alignment_metrics (Tensor): Normalized alignment metrics\n                    of all priors in the image with shape (N,).\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 7\n        # assign gt and sample anchors\n        anchors = flat_anchors[inside_flags, :]\n        assign_result = self.alignment_assigner.assign(\n            cls_scores[inside_flags, :], bbox_preds[inside_flags, :], anchors,\n            gt_bboxes, gt_bboxes_ignore, gt_labels, self.alpha, self.beta)\n        assign_ious = assign_result.max_overlaps\n        assign_metrics = assign_result.assign_metrics\n\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n\n        num_valid_anchors = anchors.shape[0]\n        bbox_targets = torch.zeros_like(anchors)\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n        norm_alignment_metrics = anchors.new_zeros(\n            num_valid_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            # point-based\n            pos_bbox_targets = sampling_result.pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class since v2.5.0\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        class_assigned_gt_inds = torch.unique(\n            sampling_result.pos_assigned_gt_inds)\n        for gt_inds in class_assigned_gt_inds:\n            gt_class_inds = pos_inds[sampling_result.pos_assigned_gt_inds ==\n                                     gt_inds]\n            pos_alignment_metrics = assign_metrics[gt_class_inds]\n            pos_ious = assign_ious[gt_class_inds]\n            pos_norm_alignment_metrics = pos_alignment_metrics / (\n                pos_alignment_metrics.max() + 10e-8) * pos_ious.max()\n            norm_alignment_metrics[gt_class_inds] = pos_norm_alignment_metrics\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            anchors = unmap(anchors, num_total_anchors, inside_flags)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags, fill=self.num_classes)\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n            bbox_targets = unmap(bbox_targets, num_total_anchors, inside_flags)\n            norm_alignment_metrics = unmap(norm_alignment_metrics,\n                                           num_total_anchors, inside_flags)\n        return (anchors, labels, label_weights, bbox_targets,\n                norm_alignment_metrics)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/vfnet_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, Scale\nfrom mmcv.ops import DeformConv2d\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (MlvlPointGenerator, bbox_overlaps, build_assigner,\n                        build_prior_generator, build_sampler, multi_apply,\n                        reduce_mean)\nfrom ..builder import HEADS, build_loss\nfrom .atss_head import ATSSHead\nfrom .fcos_head import FCOSHead\n\nINF = 1e8\n\n\n@HEADS.register_module()\nclass VFNetHead(ATSSHead, FCOSHead):\n    \"\"\"Head of `VarifocalNet (VFNet): An IoU-aware Dense Object\n    Detector.<https://arxiv.org/abs/2008.13367>`_.\n\n    The VFNet predicts IoU-aware classification scores which mix the\n    object presence confidence and object localization accuracy as the\n    detection score. It is built on the FCOS architecture and uses ATSS\n    for defining positive/negative training examples. The VFNet is trained\n    with Varifocal Loss and empolys star-shaped deformable convolution to\n    extract features for a bbox.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        regress_ranges (tuple[tuple[int, int]]): Regress range of multiple\n            level points.\n        center_sampling (bool): If true, use center sampling. Default: False.\n        center_sample_radius (float): Radius of center sampling. Default: 1.5.\n        sync_num_pos (bool): If true, synchronize the number of positive\n            examples across GPUs. Default: True\n        gradient_mul (float): The multiplier to gradients from bbox refinement\n            and recognition. Default: 0.1.\n        bbox_norm_type (str): The bbox normalization type, 'reg_denom' or\n            'stride'. Default: reg_denom\n        loss_cls_fl (dict): Config of focal loss.\n        use_vfl (bool): If true, use varifocal loss for training.\n            Default: True.\n        loss_cls (dict): Config of varifocal loss.\n        loss_bbox (dict): Config of localization loss, GIoU Loss.\n        loss_bbox (dict): Config of localization refinement loss, GIoU Loss.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: norm_cfg=dict(type='GN', num_groups=32,\n            requires_grad=True).\n        use_atss (bool): If true, use ATSS to define positive/negative\n            examples. Default: True.\n        anchor_generator (dict): Config of anchor generator for ATSS.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n\n    Example:\n        >>> self = VFNetHead(11, 7)\n        >>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]\n        >>> cls_score, bbox_pred, bbox_pred_refine= self.forward(feats)\n        >>> assert len(cls_score) == len(self.scales)\n    \"\"\"  # noqa: E501\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 512),\n                                 (512, INF)),\n                 center_sampling=False,\n                 center_sample_radius=1.5,\n                 sync_num_pos=True,\n                 gradient_mul=0.1,\n                 bbox_norm_type='reg_denom',\n                 loss_cls_fl=dict(\n                     type='FocalLoss',\n                     use_sigmoid=True,\n                     gamma=2.0,\n                     alpha=0.25,\n                     loss_weight=1.0),\n                 use_vfl=True,\n                 loss_cls=dict(\n                     type='VarifocalLoss',\n                     use_sigmoid=True,\n                     alpha=0.75,\n                     gamma=2.0,\n                     iou_weighted=True,\n                     loss_weight=1.0),\n                 loss_bbox=dict(type='GIoULoss', loss_weight=1.5),\n                 loss_bbox_refine=dict(type='GIoULoss', loss_weight=2.0),\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 use_atss=True,\n                 reg_decoded_bbox=True,\n                 anchor_generator=dict(\n                     type='AnchorGenerator',\n                     ratios=[1.0],\n                     octave_base_scale=8,\n                     scales_per_octave=1,\n                     center_offset=0.0,\n                     strides=[8, 16, 32, 64, 128]),\n                 init_cfg=dict(\n                     type='Normal',\n                     layer='Conv2d',\n                     std=0.01,\n                     override=dict(\n                         type='Normal',\n                         name='vfnet_cls',\n                         std=0.01,\n                         bias_prob=0.01)),\n                 **kwargs):\n        # dcn base offsets, adapted from reppoints_head.py\n        self.num_dconv_points = 9\n        self.dcn_kernel = int(np.sqrt(self.num_dconv_points))\n        self.dcn_pad = int((self.dcn_kernel - 1) / 2)\n        dcn_base = np.arange(-self.dcn_pad,\n                             self.dcn_pad + 1).astype(np.float64)\n        dcn_base_y = np.repeat(dcn_base, self.dcn_kernel)\n        dcn_base_x = np.tile(dcn_base, self.dcn_kernel)\n        dcn_base_offset = np.stack([dcn_base_y, dcn_base_x], axis=1).reshape(\n            (-1))\n        self.dcn_base_offset = torch.tensor(dcn_base_offset).view(1, -1, 1, 1)\n\n        super(FCOSHead, self).__init__(\n            num_classes,\n            in_channels,\n            norm_cfg=norm_cfg,\n            init_cfg=init_cfg,\n            **kwargs)\n        self.regress_ranges = regress_ranges\n        self.reg_denoms = [\n            regress_range[-1] for regress_range in regress_ranges\n        ]\n        self.reg_denoms[-1] = self.reg_denoms[-2] * 2\n        self.center_sampling = center_sampling\n        self.center_sample_radius = center_sample_radius\n        self.sync_num_pos = sync_num_pos\n        self.bbox_norm_type = bbox_norm_type\n        self.gradient_mul = gradient_mul\n        self.use_vfl = use_vfl\n        if self.use_vfl:\n            self.loss_cls = build_loss(loss_cls)\n        else:\n            self.loss_cls = build_loss(loss_cls_fl)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_bbox_refine = build_loss(loss_bbox_refine)\n\n        # for getting ATSS targets\n        self.use_atss = use_atss\n        self.reg_decoded_bbox = reg_decoded_bbox\n        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)\n\n        self.anchor_center_offset = anchor_generator['center_offset']\n\n        self.num_base_priors = self.prior_generator.num_base_priors[0]\n\n        self.sampling = False\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        # only be used in `get_atss_targets` when `use_atss` is True\n        self.atss_prior_generator = build_prior_generator(anchor_generator)\n\n        self.fcos_prior_generator = MlvlPointGenerator(\n            anchor_generator['strides'],\n            self.anchor_center_offset if self.use_atss else 0.5)\n\n        # In order to reuse the `get_bboxes` in `BaseDenseHead.\n        # Only be used in testing phase.\n        self.prior_generator = self.fcos_prior_generator\n\n    @property\n    def num_anchors(self):\n        \"\"\"\n        Returns:\n            int: Number of anchors on each point of feature map.\n        \"\"\"\n        warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '\n                      'please use \"num_base_priors\" instead')\n        return self.num_base_priors\n\n    @property\n    def anchor_generator(self):\n        warnings.warn('DeprecationWarning: anchor_generator is deprecated, '\n                      'please use \"atss_prior_generator\" instead')\n        return self.prior_generator\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        super(FCOSHead, self)._init_cls_convs()\n        super(FCOSHead, self)._init_reg_convs()\n        self.relu = nn.ReLU(inplace=True)\n        self.vfnet_reg_conv = ConvModule(\n            self.feat_channels,\n            self.feat_channels,\n            3,\n            stride=1,\n            padding=1,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg,\n            bias=self.conv_bias)\n        self.vfnet_reg = nn.Conv2d(self.feat_channels, 4, 3, padding=1)\n        self.scales = nn.ModuleList([Scale(1.0) for _ in self.strides])\n\n        self.vfnet_reg_refine_dconv = DeformConv2d(\n            self.feat_channels,\n            self.feat_channels,\n            self.dcn_kernel,\n            1,\n            padding=self.dcn_pad)\n        self.vfnet_reg_refine = nn.Conv2d(self.feat_channels, 4, 3, padding=1)\n        self.scales_refine = nn.ModuleList([Scale(1.0) for _ in self.strides])\n\n        self.vfnet_cls_dconv = DeformConv2d(\n            self.feat_channels,\n            self.feat_channels,\n            self.dcn_kernel,\n            1,\n            padding=self.dcn_pad)\n        self.vfnet_cls = nn.Conv2d(\n            self.feat_channels, self.cls_out_channels, 3, padding=1)\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple:\n                cls_scores (list[Tensor]): Box iou-aware scores for each scale\n                    level, each is a 4D-tensor, the channel number is\n                    num_points * num_classes.\n                bbox_preds (list[Tensor]): Box offsets for each\n                    scale level, each is a 4D-tensor, the channel number is\n                    num_points * 4.\n                bbox_preds_refine (list[Tensor]): Refined Box offsets for\n                    each scale level, each is a 4D-tensor, the channel\n                    number is num_points * 4.\n        \"\"\"\n        return multi_apply(self.forward_single, feats, self.scales,\n                           self.scales_refine, self.strides, self.reg_denoms)\n\n    def forward_single(self, x, scale, scale_refine, stride, reg_denom):\n        \"\"\"Forward features of a single scale level.\n\n        Args:\n            x (Tensor): FPN feature maps of the specified stride.\n            scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize\n                the bbox prediction.\n            scale_refine (:obj: `mmcv.cnn.Scale`): Learnable scale module to\n                resize the refined bbox prediction.\n            stride (int): The corresponding stride for feature maps,\n                used to normalize the bbox prediction when\n                bbox_norm_type = 'stride'.\n            reg_denom (int): The corresponding regression range for feature\n                maps, only used to normalize the bbox prediction when\n                bbox_norm_type = 'reg_denom'.\n\n        Returns:\n            tuple: iou-aware cls scores for each box, bbox predictions and\n                refined bbox predictions of input feature maps.\n        \"\"\"\n        cls_feat = x\n        reg_feat = x\n\n        for cls_layer in self.cls_convs:\n            cls_feat = cls_layer(cls_feat)\n\n        for reg_layer in self.reg_convs:\n            reg_feat = reg_layer(reg_feat)\n\n        # predict the bbox_pred of different level\n        reg_feat_init = self.vfnet_reg_conv(reg_feat)\n        if self.bbox_norm_type == 'reg_denom':\n            bbox_pred = scale(\n                self.vfnet_reg(reg_feat_init)).float().exp() * reg_denom\n        elif self.bbox_norm_type == 'stride':\n            bbox_pred = scale(\n                self.vfnet_reg(reg_feat_init)).float().exp() * stride\n        else:\n            raise NotImplementedError\n\n        # compute star deformable convolution offsets\n        # converting dcn_offset to reg_feat.dtype thus VFNet can be\n        # trained with FP16\n        dcn_offset = self.star_dcn_offset(bbox_pred, self.gradient_mul,\n                                          stride).to(reg_feat.dtype)\n\n        # refine the bbox_pred\n        reg_feat = self.relu(self.vfnet_reg_refine_dconv(reg_feat, dcn_offset))\n        bbox_pred_refine = scale_refine(\n            self.vfnet_reg_refine(reg_feat)).float().exp()\n        bbox_pred_refine = bbox_pred_refine * bbox_pred.detach()\n\n        # predict the iou-aware cls score\n        cls_feat = self.relu(self.vfnet_cls_dconv(cls_feat, dcn_offset))\n        cls_score = self.vfnet_cls(cls_feat)\n\n        if self.training:\n            return cls_score, bbox_pred, bbox_pred_refine\n        else:\n            return cls_score, bbox_pred_refine\n\n    def star_dcn_offset(self, bbox_pred, gradient_mul, stride):\n        \"\"\"Compute the star deformable conv offsets.\n\n        Args:\n            bbox_pred (Tensor): Predicted bbox distance offsets (l, r, t, b).\n            gradient_mul (float): Gradient multiplier.\n            stride (int): The corresponding stride for feature maps,\n                used to project the bbox onto the feature map.\n\n        Returns:\n            dcn_offsets (Tensor): The offsets for deformable convolution.\n        \"\"\"\n        dcn_base_offset = self.dcn_base_offset.type_as(bbox_pred)\n        bbox_pred_grad_mul = (1 - gradient_mul) * bbox_pred.detach() + \\\n            gradient_mul * bbox_pred\n        # map to the feature map scale\n        bbox_pred_grad_mul = bbox_pred_grad_mul / stride\n        N, C, H, W = bbox_pred.size()\n\n        x1 = bbox_pred_grad_mul[:, 0, :, :]\n        y1 = bbox_pred_grad_mul[:, 1, :, :]\n        x2 = bbox_pred_grad_mul[:, 2, :, :]\n        y2 = bbox_pred_grad_mul[:, 3, :, :]\n        bbox_pred_grad_mul_offset = bbox_pred.new_zeros(\n            N, 2 * self.num_dconv_points, H, W)\n        bbox_pred_grad_mul_offset[:, 0, :, :] = -1.0 * y1  # -y1\n        bbox_pred_grad_mul_offset[:, 1, :, :] = -1.0 * x1  # -x1\n        bbox_pred_grad_mul_offset[:, 2, :, :] = -1.0 * y1  # -y1\n        bbox_pred_grad_mul_offset[:, 4, :, :] = -1.0 * y1  # -y1\n        bbox_pred_grad_mul_offset[:, 5, :, :] = x2  # x2\n        bbox_pred_grad_mul_offset[:, 7, :, :] = -1.0 * x1  # -x1\n        bbox_pred_grad_mul_offset[:, 11, :, :] = x2  # x2\n        bbox_pred_grad_mul_offset[:, 12, :, :] = y2  # y2\n        bbox_pred_grad_mul_offset[:, 13, :, :] = -1.0 * x1  # -x1\n        bbox_pred_grad_mul_offset[:, 14, :, :] = y2  # y2\n        bbox_pred_grad_mul_offset[:, 16, :, :] = y2  # y2\n        bbox_pred_grad_mul_offset[:, 17, :, :] = x2  # x2\n        dcn_offset = bbox_pred_grad_mul_offset - dcn_base_offset\n\n        return dcn_offset\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'bbox_preds_refine'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             bbox_preds_refine,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box iou-aware scores for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_points * num_classes.\n            bbox_preds (list[Tensor]): Box offsets for each\n                scale level, each is a 4D-tensor, the channel number is\n                num_points * 4.\n            bbox_preds_refine (list[Tensor]): Refined Box offsets for\n                each scale level, each is a 4D-tensor, the channel\n                number is num_points * 4.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n                Default: None.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(bbox_preds_refine)\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        all_level_points = self.fcos_prior_generator.grid_priors(\n            featmap_sizes, bbox_preds[0].dtype, bbox_preds[0].device)\n        labels, label_weights, bbox_targets, bbox_weights = self.get_targets(\n            cls_scores, all_level_points, gt_bboxes, gt_labels, img_metas,\n            gt_bboxes_ignore)\n\n        num_imgs = cls_scores[0].size(0)\n        # flatten cls_scores, bbox_preds and bbox_preds_refine\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3,\n                              1).reshape(-1,\n                                         self.cls_out_channels).contiguous()\n            for cls_score in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(-1, 4).contiguous()\n            for bbox_pred in bbox_preds\n        ]\n        flatten_bbox_preds_refine = [\n            bbox_pred_refine.permute(0, 2, 3, 1).reshape(-1, 4).contiguous()\n            for bbox_pred_refine in bbox_preds_refine\n        ]\n        flatten_cls_scores = torch.cat(flatten_cls_scores)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds)\n        flatten_bbox_preds_refine = torch.cat(flatten_bbox_preds_refine)\n        flatten_labels = torch.cat(labels)\n        flatten_bbox_targets = torch.cat(bbox_targets)\n        # repeat points to align with bbox_preds\n        flatten_points = torch.cat(\n            [points.repeat(num_imgs, 1) for points in all_level_points])\n\n        # FG cat_id: [0, num_classes - 1], BG cat_id: num_classes\n        bg_class_ind = self.num_classes\n        pos_inds = torch.where(\n            ((flatten_labels >= 0) & (flatten_labels < bg_class_ind)) > 0)[0]\n        num_pos = len(pos_inds)\n\n        pos_bbox_preds = flatten_bbox_preds[pos_inds]\n        pos_bbox_preds_refine = flatten_bbox_preds_refine[pos_inds]\n        pos_labels = flatten_labels[pos_inds]\n\n        # sync num_pos across all gpus\n        if self.sync_num_pos:\n            num_pos_avg_per_gpu = reduce_mean(\n                pos_inds.new_tensor(num_pos).float()).item()\n            num_pos_avg_per_gpu = max(num_pos_avg_per_gpu, 1.0)\n        else:\n            num_pos_avg_per_gpu = num_pos\n\n        pos_bbox_targets = flatten_bbox_targets[pos_inds]\n        pos_points = flatten_points[pos_inds]\n\n        pos_decoded_bbox_preds = self.bbox_coder.decode(\n            pos_points, pos_bbox_preds)\n        pos_decoded_target_preds = self.bbox_coder.decode(\n            pos_points, pos_bbox_targets)\n        iou_targets_ini = bbox_overlaps(\n            pos_decoded_bbox_preds,\n            pos_decoded_target_preds.detach(),\n            is_aligned=True).clamp(min=1e-6)\n        bbox_weights_ini = iou_targets_ini.clone().detach()\n        bbox_avg_factor_ini = reduce_mean(\n            bbox_weights_ini.sum()).clamp_(min=1).item()\n\n        pos_decoded_bbox_preds_refine = \\\n            self.bbox_coder.decode(pos_points, pos_bbox_preds_refine)\n        iou_targets_rf = bbox_overlaps(\n            pos_decoded_bbox_preds_refine,\n            pos_decoded_target_preds.detach(),\n            is_aligned=True).clamp(min=1e-6)\n        bbox_weights_rf = iou_targets_rf.clone().detach()\n        bbox_avg_factor_rf = reduce_mean(\n            bbox_weights_rf.sum()).clamp_(min=1).item()\n\n        if num_pos > 0:\n            loss_bbox = self.loss_bbox(\n                pos_decoded_bbox_preds,\n                pos_decoded_target_preds.detach(),\n                weight=bbox_weights_ini,\n                avg_factor=bbox_avg_factor_ini)\n\n            loss_bbox_refine = self.loss_bbox_refine(\n                pos_decoded_bbox_preds_refine,\n                pos_decoded_target_preds.detach(),\n                weight=bbox_weights_rf,\n                avg_factor=bbox_avg_factor_rf)\n\n            # build IoU-aware cls_score targets\n            if self.use_vfl:\n                pos_ious = iou_targets_rf.clone().detach()\n                cls_iou_targets = torch.zeros_like(flatten_cls_scores)\n                cls_iou_targets[pos_inds, pos_labels] = pos_ious\n        else:\n            loss_bbox = pos_bbox_preds.sum() * 0\n            loss_bbox_refine = pos_bbox_preds_refine.sum() * 0\n            if self.use_vfl:\n                cls_iou_targets = torch.zeros_like(flatten_cls_scores)\n\n        if self.use_vfl:\n            loss_cls = self.loss_cls(\n                flatten_cls_scores,\n                cls_iou_targets,\n                avg_factor=num_pos_avg_per_gpu)\n        else:\n            loss_cls = self.loss_cls(\n                flatten_cls_scores,\n                flatten_labels,\n                weight=label_weights,\n                avg_factor=num_pos_avg_per_gpu)\n\n        return dict(\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            loss_bbox_rf=loss_bbox_refine)\n\n    def get_targets(self, cls_scores, mlvl_points, gt_bboxes, gt_labels,\n                    img_metas, gt_bboxes_ignore):\n        \"\"\"A wrapper for computing ATSS and FCOS targets for points in multiple\n        images.\n\n        Args:\n            cls_scores (list[Tensor]): Box iou-aware scores for each scale\n                level with shape (N, num_points * num_classes, H, W).\n            mlvl_points (list[Tensor]): Points of each fpn level, each has\n                shape (num_points, 2).\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n\n        Returns:\n            tuple:\n                labels_list (list[Tensor]): Labels of each level.\n                label_weights (Tensor/None): Label weights of all levels.\n                bbox_targets_list (list[Tensor]): Regression targets of each\n                    level, (l, t, r, b).\n                bbox_weights (Tensor/None): Bbox weights of all levels.\n        \"\"\"\n        if self.use_atss:\n            return self.get_atss_targets(cls_scores, mlvl_points, gt_bboxes,\n                                         gt_labels, img_metas,\n                                         gt_bboxes_ignore)\n        else:\n            self.norm_on_bbox = False\n            return self.get_fcos_targets(mlvl_points, gt_bboxes, gt_labels)\n\n    def _get_target_single(self, *args, **kwargs):\n        \"\"\"Avoid ambiguity in multiple inheritance.\"\"\"\n        if self.use_atss:\n            return ATSSHead._get_target_single(self, *args, **kwargs)\n        else:\n            return FCOSHead._get_target_single(self, *args, **kwargs)\n\n    def get_fcos_targets(self, points, gt_bboxes_list, gt_labels_list):\n        \"\"\"Compute FCOS regression and classification targets for points in\n        multiple images.\n\n        Args:\n            points (list[Tensor]): Points of each fpn level, each has shape\n                (num_points, 2).\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels_list (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n\n        Returns:\n            tuple:\n                labels (list[Tensor]): Labels of each level.\n                label_weights: None, to be compatible with ATSS targets.\n                bbox_targets (list[Tensor]): BBox targets of each level.\n                bbox_weights: None, to be compatible with ATSS targets.\n        \"\"\"\n        labels, bbox_targets = FCOSHead.get_targets(self, points,\n                                                    gt_bboxes_list,\n                                                    gt_labels_list)\n        label_weights = None\n        bbox_weights = None\n        return labels, label_weights, bbox_targets, bbox_weights\n\n    def get_anchors(self, featmap_sizes, img_metas, device='cuda'):\n        \"\"\"Get anchors according to feature map sizes.\n\n        Args:\n            featmap_sizes (list[tuple]): Multi-level feature map sizes.\n            img_metas (list[dict]): Image meta info.\n            device (torch.device | str): Device for returned tensors\n\n        Returns:\n            tuple:\n                anchor_list (list[Tensor]): Anchors of each image.\n                valid_flag_list (list[Tensor]): Valid flags of each image.\n        \"\"\"\n        num_imgs = len(img_metas)\n\n        # since feature map sizes of all images are the same, we only compute\n        # anchors for one time\n        multi_level_anchors = self.atss_prior_generator.grid_priors(\n            featmap_sizes, device=device)\n        anchor_list = [multi_level_anchors for _ in range(num_imgs)]\n\n        # for each image, we compute valid flags of multi level anchors\n        valid_flag_list = []\n        for img_id, img_meta in enumerate(img_metas):\n            multi_level_flags = self.atss_prior_generator.valid_flags(\n                featmap_sizes, img_meta['pad_shape'], device=device)\n            valid_flag_list.append(multi_level_flags)\n\n        return anchor_list, valid_flag_list\n\n    def get_atss_targets(self,\n                         cls_scores,\n                         mlvl_points,\n                         gt_bboxes,\n                         gt_labels,\n                         img_metas,\n                         gt_bboxes_ignore=None):\n        \"\"\"A wrapper for computing ATSS targets for points in multiple images.\n\n        Args:\n            cls_scores (list[Tensor]): Box iou-aware scores for each scale\n                level with shape (N, num_points * num_classes, H, W).\n            mlvl_points (list[Tensor]): Points of each fpn level, each has\n                shape (num_points, 2).\n            gt_bboxes (list[Tensor]): Ground truth bboxes of each image,\n                each has shape (num_gt, 4).\n            gt_labels (list[Tensor]): Ground truth labels of each box,\n                each has shape (num_gt,).\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4). Default: None.\n\n        Returns:\n            tuple:\n                labels_list (list[Tensor]): Labels of each level.\n                label_weights (Tensor): Label weights of all levels.\n                bbox_targets_list (list[Tensor]): Regression targets of each\n                    level, (l, t, r, b).\n                bbox_weights (Tensor): Bbox weights of all levels.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(\n            featmap_sizes\n        ) == self.atss_prior_generator.num_levels == \\\n            self.fcos_prior_generator.num_levels\n\n        device = cls_scores[0].device\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n\n        cls_reg_targets = ATSSHead.get_targets(\n            self,\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n            unmap_outputs=True)\n        if cls_reg_targets is None:\n            return None\n\n        (anchor_list, labels_list, label_weights_list, bbox_targets_list,\n         bbox_weights_list, num_total_pos, num_total_neg) = cls_reg_targets\n\n        bbox_targets_list = [\n            bbox_targets.reshape(-1, 4) for bbox_targets in bbox_targets_list\n        ]\n\n        num_imgs = len(img_metas)\n        # transform bbox_targets (x1, y1, x2, y2) into (l, t, r, b) format\n        bbox_targets_list = self.transform_bbox_targets(\n            bbox_targets_list, mlvl_points, num_imgs)\n\n        labels_list = [labels.reshape(-1) for labels in labels_list]\n        label_weights_list = [\n            label_weights.reshape(-1) for label_weights in label_weights_list\n        ]\n        bbox_weights_list = [\n            bbox_weights.reshape(-1) for bbox_weights in bbox_weights_list\n        ]\n        label_weights = torch.cat(label_weights_list)\n        bbox_weights = torch.cat(bbox_weights_list)\n        return labels_list, label_weights, bbox_targets_list, bbox_weights\n\n    def transform_bbox_targets(self, decoded_bboxes, mlvl_points, num_imgs):\n        \"\"\"Transform bbox_targets (x1, y1, x2, y2) into (l, t, r, b) format.\n\n        Args:\n            decoded_bboxes (list[Tensor]): Regression targets of each level,\n                in the form of (x1, y1, x2, y2).\n            mlvl_points (list[Tensor]): Points of each fpn level, each has\n                shape (num_points, 2).\n            num_imgs (int): the number of images in a batch.\n\n        Returns:\n            bbox_targets (list[Tensor]): Regression targets of each level in\n                the form of (l, t, r, b).\n        \"\"\"\n        # TODO: Re-implemented in Class PointCoder\n        assert len(decoded_bboxes) == len(mlvl_points)\n        num_levels = len(decoded_bboxes)\n        mlvl_points = [points.repeat(num_imgs, 1) for points in mlvl_points]\n        bbox_targets = []\n        for i in range(num_levels):\n            bbox_target = self.bbox_coder.encode(mlvl_points[i],\n                                                 decoded_bboxes[i])\n            bbox_targets.append(bbox_target)\n\n        return bbox_targets\n\n    def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict,\n                              missing_keys, unexpected_keys, error_msgs):\n        \"\"\"Override the method in the parent class to avoid changing para's\n        name.\"\"\"\n        pass\n\n    def _get_points_single(self,\n                           featmap_size,\n                           stride,\n                           dtype,\n                           device,\n                           flatten=False):\n        \"\"\"Get points according to feature map size.\n\n        This function will be deprecated soon.\n        \"\"\"\n\n        warnings.warn(\n            '`_get_points_single` in `VFNetHead` will be '\n            'deprecated soon, we support a multi level point generator now'\n            'you can get points of a single level feature map'\n            'with `self.fcos_prior_generator.single_level_grid_priors` ')\n\n        h, w = featmap_size\n        x_range = torch.arange(\n            0, w * stride, stride, dtype=dtype, device=device)\n        y_range = torch.arange(\n            0, h * stride, stride, dtype=dtype, device=device)\n        y, x = torch.meshgrid(y_range, x_range)\n        # to be compatible with anchor points in ATSS\n        if self.use_atss:\n            points = torch.stack(\n                (x.reshape(-1), y.reshape(-1)), dim=-1) + \\\n                     stride * self.anchor_center_offset\n        else:\n            points = torch.stack(\n                (x.reshape(-1), y.reshape(-1)), dim=-1) + stride // 2\n        return points\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/yolact_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, ModuleList, force_fp32\n\nfrom mmdet.core import build_sampler, fast_nms, images_to_levels, multi_apply\nfrom mmdet.core.utils import select_single_mlvl\nfrom ..builder import HEADS, build_loss\nfrom .anchor_head import AnchorHead\n\n\n@HEADS.register_module()\nclass YOLACTHead(AnchorHead):\n    \"\"\"YOLACT box head used in https://arxiv.org/abs/1904.02689.\n\n    Note that YOLACT head is a light version of RetinaNet head.\n    Four differences are described as follows:\n\n    1. YOLACT box head has three-times fewer anchors.\n    2. YOLACT box head shares the convs for box and cls branches.\n    3. YOLACT box head uses OHEM instead of Focal loss.\n    4. YOLACT box head predicts a set of mask coefficients for each box.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        anchor_generator (dict): Config dict for anchor generator\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        num_head_convs (int): Number of the conv layers shared by\n            box and cls branches.\n        num_protos (int): Number of the mask coefficients.\n        use_ohem (bool): If true, ``loss_single_OHEM`` will be used for\n            cls loss calculation. If false, ``loss_single`` will be used.\n        conv_cfg (dict): Dictionary to construct and config conv layer.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 anchor_generator=dict(\n                     type='AnchorGenerator',\n                     octave_base_scale=3,\n                     scales_per_octave=1,\n                     ratios=[0.5, 1.0, 2.0],\n                     strides=[8, 16, 32, 64, 128]),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     reduction='none',\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0, loss_weight=1.5),\n                 num_head_convs=1,\n                 num_protos=32,\n                 use_ohem=True,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=dict(\n                     type='Xavier',\n                     distribution='uniform',\n                     bias=0,\n                     layer='Conv2d'),\n                 **kwargs):\n        self.num_head_convs = num_head_convs\n        self.num_protos = num_protos\n        self.use_ohem = use_ohem\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        super(YOLACTHead, self).__init__(\n            num_classes,\n            in_channels,\n            loss_cls=loss_cls,\n            loss_bbox=loss_bbox,\n            anchor_generator=anchor_generator,\n            init_cfg=init_cfg,\n            **kwargs)\n        if self.use_ohem:\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n            self.sampling = False\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.relu = nn.ReLU(inplace=True)\n        self.head_convs = ModuleList()\n        for i in range(self.num_head_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            self.head_convs.append(\n                ConvModule(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.conv_cls = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * self.cls_out_channels,\n            3,\n            padding=1)\n        self.conv_reg = nn.Conv2d(\n            self.feat_channels, self.num_base_priors * 4, 3, padding=1)\n        self.conv_coeff = nn.Conv2d(\n            self.feat_channels,\n            self.num_base_priors * self.num_protos,\n            3,\n            padding=1)\n\n    def forward_single(self, x):\n        \"\"\"Forward feature of a single scale level.\n\n        Args:\n            x (Tensor): Features of a single scale level.\n\n        Returns:\n            tuple:\n                cls_score (Tensor): Cls scores for a single scale level \\\n                    the channels number is num_anchors * num_classes.\n                bbox_pred (Tensor): Box energies / deltas for a single scale \\\n                    level, the channels number is num_anchors * 4.\n                coeff_pred (Tensor): Mask coefficients for a single scale \\\n                    level, the channels number is num_anchors * num_protos.\n        \"\"\"\n        for head_conv in self.head_convs:\n            x = head_conv(x)\n        cls_score = self.conv_cls(x)\n        bbox_pred = self.conv_reg(x)\n        coeff_pred = self.conv_coeff(x).tanh()\n        return cls_score, bbox_pred, coeff_pred\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"A combination of the func:``AnchorHead.loss`` and\n        func:``SSDHead.loss``.\n\n        When ``self.use_ohem == True``, it functions like ``SSDHead.loss``,\n        otherwise, it follows ``AnchorHead.loss``. Besides, it additionally\n        returns ``sampling_results``.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss. Default: None\n\n        Returns:\n            tuple:\n                dict[str, Tensor]: A dictionary of loss components.\n                List[:obj:``SamplingResult``]: Sampler results for each image.\n        \"\"\"\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        assert len(featmap_sizes) == self.prior_generator.num_levels\n\n        device = cls_scores[0].device\n\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels,\n            unmap_outputs=not self.use_ohem,\n            return_sampling_results=True)\n        if cls_reg_targets is None:\n            return None\n        (labels_list, label_weights_list, bbox_targets_list, bbox_weights_list,\n         num_total_pos, num_total_neg, sampling_results) = cls_reg_targets\n\n        if self.use_ohem:\n            num_images = len(img_metas)\n            all_cls_scores = torch.cat([\n                s.permute(0, 2, 3, 1).reshape(\n                    num_images, -1, self.cls_out_channels) for s in cls_scores\n            ], 1)\n            all_labels = torch.cat(labels_list, -1).view(num_images, -1)\n            all_label_weights = torch.cat(label_weights_list,\n                                          -1).view(num_images, -1)\n            all_bbox_preds = torch.cat([\n                b.permute(0, 2, 3, 1).reshape(num_images, -1, 4)\n                for b in bbox_preds\n            ], -2)\n            all_bbox_targets = torch.cat(bbox_targets_list,\n                                         -2).view(num_images, -1, 4)\n            all_bbox_weights = torch.cat(bbox_weights_list,\n                                         -2).view(num_images, -1, 4)\n\n            # concat all level anchors to a single tensor\n            all_anchors = []\n            for i in range(num_images):\n                all_anchors.append(torch.cat(anchor_list[i]))\n\n            # check NaN and Inf\n            assert torch.isfinite(all_cls_scores).all().item(), \\\n                'classification scores become infinite or NaN!'\n            assert torch.isfinite(all_bbox_preds).all().item(), \\\n                'bbox predications become infinite or NaN!'\n\n            losses_cls, losses_bbox = multi_apply(\n                self.loss_single_OHEM,\n                all_cls_scores,\n                all_bbox_preds,\n                all_anchors,\n                all_labels,\n                all_label_weights,\n                all_bbox_targets,\n                all_bbox_weights,\n                num_total_samples=num_total_pos)\n        else:\n            num_total_samples = (\n                num_total_pos +\n                num_total_neg if self.sampling else num_total_pos)\n\n            # anchor number of multi levels\n            num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n            # concat all level anchors and flags to a single tensor\n            concat_anchor_list = []\n            for i in range(len(anchor_list)):\n                concat_anchor_list.append(torch.cat(anchor_list[i]))\n            all_anchor_list = images_to_levels(concat_anchor_list,\n                                               num_level_anchors)\n            losses_cls, losses_bbox = multi_apply(\n                self.loss_single,\n                cls_scores,\n                bbox_preds,\n                all_anchor_list,\n                labels_list,\n                label_weights_list,\n                bbox_targets_list,\n                bbox_weights_list,\n                num_total_samples=num_total_samples)\n\n        return dict(\n            loss_cls=losses_cls, loss_bbox=losses_bbox), sampling_results\n\n    def loss_single_OHEM(self, cls_score, bbox_pred, anchors, labels,\n                         label_weights, bbox_targets, bbox_weights,\n                         num_total_samples):\n        \"\"\"\"See func:``SSDHead.loss``.\"\"\"\n        loss_cls_all = self.loss_cls(cls_score, labels, label_weights)\n\n        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n        pos_inds = ((labels >= 0) & (labels < self.num_classes)).nonzero(\n            as_tuple=False).reshape(-1)\n        neg_inds = (labels == self.num_classes).nonzero(\n            as_tuple=False).view(-1)\n\n        num_pos_samples = pos_inds.size(0)\n        if num_pos_samples == 0:\n            num_neg_samples = neg_inds.size(0)\n        else:\n            num_neg_samples = self.train_cfg.neg_pos_ratio * num_pos_samples\n            if num_neg_samples > neg_inds.size(0):\n                num_neg_samples = neg_inds.size(0)\n        topk_loss_cls_neg, _ = loss_cls_all[neg_inds].topk(num_neg_samples)\n        loss_cls_pos = loss_cls_all[pos_inds].sum()\n        loss_cls_neg = topk_loss_cls_neg.sum()\n        loss_cls = (loss_cls_pos + loss_cls_neg) / num_total_samples\n        if self.reg_decoded_bbox:\n            # When the regression loss (e.g. `IouLoss`, `GIouLoss`)\n            # is applied directly on the decoded bounding boxes, it\n            # decodes the already encoded coordinates to absolute format.\n            bbox_pred = self.bbox_coder.decode(anchors, bbox_pred)\n        loss_bbox = self.loss_bbox(\n            bbox_pred,\n            bbox_targets,\n            bbox_weights,\n            avg_factor=num_total_samples)\n        return loss_cls[None], loss_bbox\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'coeff_preds'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   coeff_preds,\n                   img_metas,\n                   cfg=None,\n                   rescale=False):\n        \"\"\"\"Similar to func:``AnchorHead.get_bboxes``, but additionally\n        processes coeff_preds.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                with shape (N, num_anchors * num_classes, H, W)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (N, num_anchors * 4, H, W)\n            coeff_preds (list[Tensor]): Mask coefficients for each scale\n                level with shape (N, num_anchors * num_protos, H, W)\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cfg (mmcv.Config | None): Test / postprocessing configuration,\n                if None, test_cfg would be used\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n\n        Returns:\n            list[tuple[Tensor, Tensor, Tensor]]: Each item in result_list is\n                a 3-tuple. The first item is an (n, 5) tensor, where the\n                first 4 columns are bounding box positions\n                (tl_x, tl_y, br_x, br_y) and the 5-th column is a score\n                between 0 and 1. The second item is an (n,) tensor where each\n                item is the predicted class label of the corresponding box.\n                The third item is an (n, num_protos) tensor where each item\n                is the predicted mask coefficients of instance inside the\n                corresponding box.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds)\n        num_levels = len(cls_scores)\n\n        device = cls_scores[0].device\n        featmap_sizes = [cls_scores[i].shape[-2:] for i in range(num_levels)]\n        mlvl_anchors = self.prior_generator.grid_priors(\n            featmap_sizes, device=device)\n\n        det_bboxes = []\n        det_labels = []\n        det_coeffs = []\n        for img_id in range(len(img_metas)):\n            cls_score_list = select_single_mlvl(cls_scores, img_id)\n            bbox_pred_list = select_single_mlvl(bbox_preds, img_id)\n            coeff_pred_list = select_single_mlvl(coeff_preds, img_id)\n            img_shape = img_metas[img_id]['img_shape']\n            scale_factor = img_metas[img_id]['scale_factor']\n            bbox_res = self._get_bboxes_single(cls_score_list, bbox_pred_list,\n                                               coeff_pred_list, mlvl_anchors,\n                                               img_shape, scale_factor, cfg,\n                                               rescale)\n            det_bboxes.append(bbox_res[0])\n            det_labels.append(bbox_res[1])\n            det_coeffs.append(bbox_res[2])\n        return det_bboxes, det_labels, det_coeffs\n\n    def _get_bboxes_single(self,\n                           cls_score_list,\n                           bbox_pred_list,\n                           coeff_preds_list,\n                           mlvl_anchors,\n                           img_shape,\n                           scale_factor,\n                           cfg,\n                           rescale=False):\n        \"\"\"\"Similar to func:``AnchorHead._get_bboxes_single``, but additionally\n        processes coeff_preds_list and uses fast NMS instead of traditional\n        NMS.\n\n        Args:\n            cls_score_list (list[Tensor]): Box scores for a single scale level\n                Has shape (num_anchors * num_classes, H, W).\n            bbox_pred_list (list[Tensor]): Box energies / deltas for a single\n                scale level with shape (num_anchors * 4, H, W).\n            coeff_preds_list (list[Tensor]): Mask coefficients for a single\n                scale level with shape (num_anchors * num_protos, H, W).\n            mlvl_anchors (list[Tensor]): Box reference for a single scale level\n                with shape (num_total_anchors, 4).\n            img_shape (tuple[int]): Shape of the input image,\n                (height, width, 3).\n            scale_factor (ndarray): Scale factor of the image arange as\n                (w_scale, h_scale, w_scale, h_scale).\n            cfg (mmcv.Config): Test / postprocessing configuration,\n                if None, test_cfg would be used.\n            rescale (bool): If True, return boxes in original image space.\n\n        Returns:\n            tuple[Tensor, Tensor, Tensor]: The first item is an (n, 5) tensor,\n                where the first 4 columns are bounding box positions\n                (tl_x, tl_y, br_x, br_y) and the 5-th column is a score between\n                0 and 1. The second item is an (n,) tensor where each item is\n                the predicted class label of the corresponding box. The third\n                item is an (n, num_protos) tensor where each item is the\n                predicted mask coefficients of instance inside the\n                corresponding box.\n        \"\"\"\n        cfg = self.test_cfg if cfg is None else cfg\n        assert len(cls_score_list) == len(bbox_pred_list) == len(mlvl_anchors)\n        nms_pre = cfg.get('nms_pre', -1)\n        mlvl_bboxes = []\n        mlvl_scores = []\n        mlvl_coeffs = []\n        for cls_score, bbox_pred, coeff_pred, anchors in \\\n                zip(cls_score_list, bbox_pred_list,\n                    coeff_preds_list, mlvl_anchors):\n            assert cls_score.size()[-2:] == bbox_pred.size()[-2:]\n            cls_score = cls_score.permute(1, 2,\n                                          0).reshape(-1, self.cls_out_channels)\n            if self.use_sigmoid_cls:\n                scores = cls_score.sigmoid()\n            else:\n                scores = cls_score.softmax(-1)\n            bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, 4)\n            coeff_pred = coeff_pred.permute(1, 2,\n                                            0).reshape(-1, self.num_protos)\n\n            if 0 < nms_pre < scores.shape[0]:\n                # Get maximum scores for foreground classes.\n                if self.use_sigmoid_cls:\n                    max_scores, _ = scores.max(dim=1)\n                else:\n                    # remind that we set FG labels to [0, num_class-1]\n                    # since mmdet v2.0\n                    # BG cat_id: num_class\n                    max_scores, _ = scores[:, :-1].max(dim=1)\n                _, topk_inds = max_scores.topk(nms_pre)\n                anchors = anchors[topk_inds, :]\n                bbox_pred = bbox_pred[topk_inds, :]\n                scores = scores[topk_inds, :]\n                coeff_pred = coeff_pred[topk_inds, :]\n            bboxes = self.bbox_coder.decode(\n                anchors, bbox_pred, max_shape=img_shape)\n            mlvl_bboxes.append(bboxes)\n            mlvl_scores.append(scores)\n            mlvl_coeffs.append(coeff_pred)\n        mlvl_bboxes = torch.cat(mlvl_bboxes)\n        if rescale:\n            mlvl_bboxes /= mlvl_bboxes.new_tensor(scale_factor)\n        mlvl_scores = torch.cat(mlvl_scores)\n        mlvl_coeffs = torch.cat(mlvl_coeffs)\n        if self.use_sigmoid_cls:\n            # Add a dummy background class to the backend when using sigmoid\n            # remind that we set FG labels to [0, num_class-1] since mmdet v2.0\n            # BG cat_id: num_class\n            padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)\n            mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)\n        det_bboxes, det_labels, det_coeffs = fast_nms(mlvl_bboxes, mlvl_scores,\n                                                      mlvl_coeffs,\n                                                      cfg.score_thr,\n                                                      cfg.iou_thr, cfg.top_k,\n                                                      cfg.max_per_img)\n        return det_bboxes, det_labels, det_coeffs\n\n\n@HEADS.register_module()\nclass YOLACTSegmHead(BaseModule):\n    \"\"\"YOLACT segmentation head used in https://arxiv.org/abs/1904.02689.\n\n    Apply a semantic segmentation loss on feature space using layers that are\n    only evaluated during training to increase performance with no speed\n    penalty.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        num_classes (int): Number of categories excluding the background\n            category.\n        loss_segm (dict): Config of semantic segmentation loss.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels=256,\n                 loss_segm=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 init_cfg=dict(\n                     type='Xavier',\n                     distribution='uniform',\n                     override=dict(name='segm_conv'))):\n        super(YOLACTSegmHead, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.loss_segm = build_loss(loss_segm)\n        self._init_layers()\n        self.fp16_enabled = False\n\n    def _init_layers(self):\n        \"\"\"Initialize layers of the head.\"\"\"\n        self.segm_conv = nn.Conv2d(\n            self.in_channels, self.num_classes, kernel_size=1)\n\n    def forward(self, x):\n        \"\"\"Forward feature from the upstream network.\n\n        Args:\n            x (Tensor): Feature from the upstream network, which is\n                a 4D-tensor.\n\n        Returns:\n            Tensor: Predicted semantic segmentation map with shape\n                (N, num_classes, H, W).\n        \"\"\"\n        return self.segm_conv(x)\n\n    @force_fp32(apply_to=('segm_pred', ))\n    def loss(self, segm_pred, gt_masks, gt_labels):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            segm_pred (list[Tensor]): Predicted semantic segmentation map\n                with shape (N, num_classes, H, W).\n            gt_masks (list[Tensor]): Ground truth masks for each image with\n                the same shape of the input image.\n            gt_labels (list[Tensor]): Class indices corresponding to each box.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        loss_segm = []\n        num_imgs, num_classes, mask_h, mask_w = segm_pred.size()\n        for idx in range(num_imgs):\n            cur_segm_pred = segm_pred[idx]\n            cur_gt_masks = gt_masks[idx].float()\n            cur_gt_labels = gt_labels[idx]\n            segm_targets = self.get_targets(cur_segm_pred, cur_gt_masks,\n                                            cur_gt_labels)\n            if segm_targets is None:\n                loss = self.loss_segm(cur_segm_pred,\n                                      torch.zeros_like(cur_segm_pred),\n                                      torch.zeros_like(cur_segm_pred))\n            else:\n                loss = self.loss_segm(\n                    cur_segm_pred,\n                    segm_targets,\n                    avg_factor=num_imgs * mask_h * mask_w)\n            loss_segm.append(loss)\n        return dict(loss_segm=loss_segm)\n\n    def get_targets(self, segm_pred, gt_masks, gt_labels):\n        \"\"\"Compute semantic segmentation targets for each image.\n\n        Args:\n            segm_pred (Tensor): Predicted semantic segmentation map\n                with shape (num_classes, H, W).\n            gt_masks (Tensor): Ground truth masks for each image with\n                the same shape of the input image.\n            gt_labels (Tensor): Class indices corresponding to each box.\n\n        Returns:\n            Tensor: Semantic segmentation targets with shape\n                (num_classes, H, W).\n        \"\"\"\n        if gt_masks.size(0) == 0:\n            return None\n        num_classes, mask_h, mask_w = segm_pred.size()\n        with torch.no_grad():\n            downsampled_masks = F.interpolate(\n                gt_masks.unsqueeze(0), (mask_h, mask_w),\n                mode='bilinear',\n                align_corners=False).squeeze(0)\n            downsampled_masks = downsampled_masks.gt(0.5).float()\n            segm_targets = torch.zeros_like(segm_pred, requires_grad=False)\n            for obj_idx in range(downsampled_masks.size(0)):\n                segm_targets[gt_labels[obj_idx] - 1] = torch.max(\n                    segm_targets[gt_labels[obj_idx] - 1],\n                    downsampled_masks[obj_idx])\n            return segm_targets\n\n    def simple_test(self, feats, img_metas, rescale=False):\n        \"\"\"Test function without test-time augmentation.\"\"\"\n        raise NotImplementedError(\n            'simple_test of YOLACTSegmHead is not implemented '\n            'because this head is only evaluated during training')\n\n\n@HEADS.register_module()\nclass YOLACTProtonet(BaseModule):\n    \"\"\"YOLACT mask head used in https://arxiv.org/abs/1904.02689.\n\n    This head outputs the mask prototypes for YOLACT.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        proto_channels (tuple[int]): Output channels of protonet convs.\n        proto_kernel_sizes (tuple[int]): Kernel sizes of protonet convs.\n        include_last_relu (Bool): If keep the last relu of protonet.\n        num_protos (int): Number of prototypes.\n        num_classes (int): Number of categories excluding the background\n            category.\n        loss_mask_weight (float): Reweight the mask loss by this factor.\n        max_masks_to_train (int): Maximum number of masks to train for\n            each image.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels=256,\n                 proto_channels=(256, 256, 256, None, 256, 32),\n                 proto_kernel_sizes=(3, 3, 3, -2, 3, 1),\n                 include_last_relu=True,\n                 num_protos=32,\n                 loss_mask_weight=1.0,\n                 max_masks_to_train=100,\n                 init_cfg=dict(\n                     type='Xavier',\n                     distribution='uniform',\n                     override=dict(name='protonet'))):\n        super(YOLACTProtonet, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.proto_channels = proto_channels\n        self.proto_kernel_sizes = proto_kernel_sizes\n        self.include_last_relu = include_last_relu\n        self.protonet = self._init_layers()\n\n        self.loss_mask_weight = loss_mask_weight\n        self.num_protos = num_protos\n        self.num_classes = num_classes\n        self.max_masks_to_train = max_masks_to_train\n        self.fp16_enabled = False\n\n    def _init_layers(self):\n        \"\"\"A helper function to take a config setting and turn it into a\n        network.\"\"\"\n        # Possible patterns:\n        # ( 256, 3) -> conv\n        # ( 256,-2) -> deconv\n        # (None,-2) -> bilinear interpolate\n        in_channels = self.in_channels\n        protonets = ModuleList()\n        for num_channels, kernel_size in zip(self.proto_channels,\n                                             self.proto_kernel_sizes):\n            if kernel_size > 0:\n                layer = nn.Conv2d(\n                    in_channels,\n                    num_channels,\n                    kernel_size,\n                    padding=kernel_size // 2)\n            else:\n                if num_channels is None:\n                    layer = InterpolateModule(\n                        scale_factor=-kernel_size,\n                        mode='bilinear',\n                        align_corners=False)\n                else:\n                    layer = nn.ConvTranspose2d(\n                        in_channels,\n                        num_channels,\n                        -kernel_size,\n                        padding=kernel_size // 2)\n            protonets.append(layer)\n            protonets.append(nn.ReLU(inplace=True))\n            in_channels = num_channels if num_channels is not None \\\n                else in_channels\n        if not self.include_last_relu:\n            protonets = protonets[:-1]\n        return nn.Sequential(*protonets)\n\n    def forward_dummy(self, x):\n        prototypes = self.protonet(x)\n        return prototypes\n\n    def forward(self, x, coeff_pred, bboxes, img_meta, sampling_results=None):\n        \"\"\"Forward feature from the upstream network to get prototypes and\n        linearly combine the prototypes, using masks coefficients, into\n        instance masks. Finally, crop the instance masks with given bboxes.\n\n        Args:\n            x (Tensor): Feature from the upstream network, which is\n                a 4D-tensor.\n            coeff_pred (list[Tensor]): Mask coefficients for each scale\n                level with shape (N, num_anchors * num_protos, H, W).\n            bboxes (list[Tensor]): Box used for cropping with shape\n                (N, num_anchors * 4, H, W). During training, they are\n                ground truth boxes. During testing, they are predicted\n                boxes.\n            img_meta (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            sampling_results (List[:obj:``SamplingResult``]): Sampler results\n                for each image.\n\n        Returns:\n            list[Tensor]: Predicted instance segmentation masks.\n        \"\"\"\n        prototypes = self.protonet(x)\n        prototypes = prototypes.permute(0, 2, 3, 1).contiguous()\n\n        num_imgs = x.size(0)\n\n        # The reason for not using self.training is that\n        # val workflow will have a dimension mismatch error.\n        # Note that this writing method is very tricky.\n        # Fix https://github.com/open-mmlab/mmdetection/issues/5978\n        is_train_or_val_workflow = (coeff_pred[0].dim() == 4)\n\n        # Train or val workflow\n        if is_train_or_val_workflow:\n            coeff_pred_list = []\n            for coeff_pred_per_level in coeff_pred:\n                coeff_pred_per_level = \\\n                    coeff_pred_per_level.permute(\n                        0, 2, 3, 1).reshape(num_imgs, -1, self.num_protos)\n                coeff_pred_list.append(coeff_pred_per_level)\n            coeff_pred = torch.cat(coeff_pred_list, dim=1)\n\n        mask_pred_list = []\n        for idx in range(num_imgs):\n            cur_prototypes = prototypes[idx]\n            cur_coeff_pred = coeff_pred[idx]\n            cur_bboxes = bboxes[idx]\n            cur_img_meta = img_meta[idx]\n\n            # Testing state\n            if not is_train_or_val_workflow:\n                bboxes_for_cropping = cur_bboxes\n            else:\n                cur_sampling_results = sampling_results[idx]\n                pos_assigned_gt_inds = \\\n                    cur_sampling_results.pos_assigned_gt_inds\n                bboxes_for_cropping = cur_bboxes[pos_assigned_gt_inds].clone()\n                pos_inds = cur_sampling_results.pos_inds\n                cur_coeff_pred = cur_coeff_pred[pos_inds]\n\n            # Linearly combine the prototypes with the mask coefficients\n            mask_pred = cur_prototypes @ cur_coeff_pred.t()\n            mask_pred = torch.sigmoid(mask_pred)\n\n            h, w = cur_img_meta['img_shape'][:2]\n            bboxes_for_cropping[:, 0] /= w\n            bboxes_for_cropping[:, 1] /= h\n            bboxes_for_cropping[:, 2] /= w\n            bboxes_for_cropping[:, 3] /= h\n\n            mask_pred = self.crop(mask_pred, bboxes_for_cropping)\n            mask_pred = mask_pred.permute(2, 0, 1).contiguous()\n            mask_pred_list.append(mask_pred)\n        return mask_pred_list\n\n    @force_fp32(apply_to=('mask_pred', ))\n    def loss(self, mask_pred, gt_masks, gt_bboxes, img_meta, sampling_results):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            mask_pred (list[Tensor]): Predicted prototypes with shape\n                (num_classes, H, W).\n            gt_masks (list[Tensor]): Ground truth masks for each image with\n                the same shape of the input image.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            img_meta (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            sampling_results (List[:obj:``SamplingResult``]): Sampler results\n                for each image.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        loss_mask = []\n        num_imgs = len(mask_pred)\n        total_pos = 0\n        for idx in range(num_imgs):\n            cur_mask_pred = mask_pred[idx]\n            cur_gt_masks = gt_masks[idx].float()\n            cur_gt_bboxes = gt_bboxes[idx]\n            cur_img_meta = img_meta[idx]\n            cur_sampling_results = sampling_results[idx]\n\n            pos_assigned_gt_inds = cur_sampling_results.pos_assigned_gt_inds\n            num_pos = pos_assigned_gt_inds.size(0)\n            # Since we're producing (near) full image masks,\n            # it'd take too much vram to backprop on every single mask.\n            # Thus we select only a subset.\n            if num_pos > self.max_masks_to_train:\n                perm = torch.randperm(num_pos)\n                select = perm[:self.max_masks_to_train]\n                cur_mask_pred = cur_mask_pred[select]\n                pos_assigned_gt_inds = pos_assigned_gt_inds[select]\n                num_pos = self.max_masks_to_train\n            total_pos += num_pos\n\n            gt_bboxes_for_reweight = cur_gt_bboxes[pos_assigned_gt_inds]\n\n            mask_targets = self.get_targets(cur_mask_pred, cur_gt_masks,\n                                            pos_assigned_gt_inds)\n            if num_pos == 0:\n                loss = cur_mask_pred.sum() * 0.\n            elif mask_targets is None:\n                loss = F.binary_cross_entropy(cur_mask_pred,\n                                              torch.zeros_like(cur_mask_pred),\n                                              torch.zeros_like(cur_mask_pred))\n            else:\n                cur_mask_pred = torch.clamp(cur_mask_pred, 0, 1)\n                loss = F.binary_cross_entropy(\n                    cur_mask_pred, mask_targets,\n                    reduction='none') * self.loss_mask_weight\n\n                h, w = cur_img_meta['img_shape'][:2]\n                gt_bboxes_width = (gt_bboxes_for_reweight[:, 2] -\n                                   gt_bboxes_for_reweight[:, 0]) / w\n                gt_bboxes_height = (gt_bboxes_for_reweight[:, 3] -\n                                    gt_bboxes_for_reweight[:, 1]) / h\n                loss = loss.mean(dim=(1,\n                                      2)) / gt_bboxes_width / gt_bboxes_height\n                loss = torch.sum(loss)\n            loss_mask.append(loss)\n\n        if total_pos == 0:\n            total_pos += 1  # avoid nan\n        loss_mask = [x / total_pos for x in loss_mask]\n\n        return dict(loss_mask=loss_mask)\n\n    def get_targets(self, mask_pred, gt_masks, pos_assigned_gt_inds):\n        \"\"\"Compute instance segmentation targets for each image.\n\n        Args:\n            mask_pred (Tensor): Predicted prototypes with shape\n                (num_classes, H, W).\n            gt_masks (Tensor): Ground truth masks for each image with\n                the same shape of the input image.\n            pos_assigned_gt_inds (Tensor): GT indices of the corresponding\n                positive samples.\n        Returns:\n            Tensor: Instance segmentation targets with shape\n                (num_instances, H, W).\n        \"\"\"\n        if gt_masks.size(0) == 0:\n            return None\n        mask_h, mask_w = mask_pred.shape[-2:]\n        gt_masks = F.interpolate(\n            gt_masks.unsqueeze(0), (mask_h, mask_w),\n            mode='bilinear',\n            align_corners=False).squeeze(0)\n        gt_masks = gt_masks.gt(0.5).float()\n        mask_targets = gt_masks[pos_assigned_gt_inds]\n        return mask_targets\n\n    def get_seg_masks(self, mask_pred, label_pred, img_meta, rescale):\n        \"\"\"Resize, binarize, and format the instance mask predictions.\n\n        Args:\n            mask_pred (Tensor): shape (N, H, W).\n            label_pred (Tensor): shape (N, ).\n            img_meta (dict): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If rescale is False, then returned masks will\n                fit the scale of imgs[0].\n        Returns:\n            list[ndarray]: Mask predictions grouped by their predicted classes.\n        \"\"\"\n        ori_shape = img_meta['ori_shape']\n        scale_factor = img_meta['scale_factor']\n        if rescale:\n            img_h, img_w = ori_shape[:2]\n        else:\n            img_h = np.round(ori_shape[0] * scale_factor[1]).astype(np.int32)\n            img_w = np.round(ori_shape[1] * scale_factor[0]).astype(np.int32)\n\n        cls_segms = [[] for _ in range(self.num_classes)]\n        if mask_pred.size(0) == 0:\n            return cls_segms\n\n        mask_pred = F.interpolate(\n            mask_pred.unsqueeze(0), (img_h, img_w),\n            mode='bilinear',\n            align_corners=False).squeeze(0) > 0.5\n        mask_pred = mask_pred.cpu().numpy().astype(np.uint8)\n\n        for m, l in zip(mask_pred, label_pred):\n            cls_segms[l].append(m)\n        return cls_segms\n\n    def crop(self, masks, boxes, padding=1):\n        \"\"\"Crop predicted masks by zeroing out everything not in the predicted\n        bbox.\n\n        Args:\n            masks (Tensor): shape [H, W, N].\n            boxes (Tensor): bbox coords in relative point form with\n                shape [N, 4].\n\n        Return:\n            Tensor: The cropped masks.\n        \"\"\"\n        h, w, n = masks.size()\n        x1, x2 = self.sanitize_coordinates(\n            boxes[:, 0], boxes[:, 2], w, padding, cast=False)\n        y1, y2 = self.sanitize_coordinates(\n            boxes[:, 1], boxes[:, 3], h, padding, cast=False)\n\n        rows = torch.arange(\n            w, device=masks.device, dtype=x1.dtype).view(1, -1,\n                                                         1).expand(h, w, n)\n        cols = torch.arange(\n            h, device=masks.device, dtype=x1.dtype).view(-1, 1,\n                                                         1).expand(h, w, n)\n\n        masks_left = rows >= x1.view(1, 1, -1)\n        masks_right = rows < x2.view(1, 1, -1)\n        masks_up = cols >= y1.view(1, 1, -1)\n        masks_down = cols < y2.view(1, 1, -1)\n\n        crop_mask = masks_left * masks_right * masks_up * masks_down\n\n        return masks * crop_mask.float()\n\n    def sanitize_coordinates(self, x1, x2, img_size, padding=0, cast=True):\n        \"\"\"Sanitizes the input coordinates so that x1 < x2, x1 != x2, x1 >= 0,\n        and x2 <= image_size. Also converts from relative to absolute\n        coordinates and casts the results to long tensors.\n\n        Warning: this does things in-place behind the scenes so\n        copy if necessary.\n\n        Args:\n            _x1 (Tensor): shape (N, ).\n            _x2 (Tensor): shape (N, ).\n            img_size (int): Size of the input image.\n            padding (int): x1 >= padding, x2 <= image_size-padding.\n            cast (bool): If cast is false, the result won't be cast to longs.\n\n        Returns:\n            tuple:\n                x1 (Tensor): Sanitized _x1.\n                x2 (Tensor): Sanitized _x2.\n        \"\"\"\n        x1 = x1 * img_size\n        x2 = x2 * img_size\n        if cast:\n            x1 = x1.long()\n            x2 = x2.long()\n        x1 = torch.min(x1, x2)\n        x2 = torch.max(x1, x2)\n        x1 = torch.clamp(x1 - padding, min=0)\n        x2 = torch.clamp(x2 + padding, max=img_size)\n        return x1, x2\n\n    def simple_test(self,\n                    feats,\n                    det_bboxes,\n                    det_labels,\n                    det_coeffs,\n                    img_metas,\n                    rescale=False):\n        \"\"\"Test function without test-time augmentation.\n\n        Args:\n            feats (tuple[torch.Tensor]): Multi-level features from the\n               upstream network, each is a 4D-tensor.\n            det_bboxes (list[Tensor]): BBox results of each image. each\n               element is (n, 5) tensor, where 5 represent\n               (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.\n            det_labels (list[Tensor]): BBox results of each image. each\n               element is (n, ) tensor, each element represents the class\n               label of the corresponding box.\n            det_coeffs (list[Tensor]): BBox coefficient of each image. each\n               element is (n, m) tensor, m is vector length.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[list]: encoded masks. The c-th item in the outer list\n                corresponds to the c-th class. Given the c-th outer list, the\n                i-th item in that inner list is the mask for the i-th box with\n                class label c.\n        \"\"\"\n        num_imgs = len(img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            segm_results = [[[] for _ in range(self.num_classes)]\n                            for _ in range(num_imgs)]\n        else:\n            # if det_bboxes is rescaled to the original image size, we need to\n            # rescale it back to the testing scale to obtain RoIs.\n            if rescale and not isinstance(scale_factors[0], float):\n                scale_factors = [\n                    torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                    for scale_factor in scale_factors\n                ]\n            _bboxes = [\n                det_bboxes[i][:, :4] *\n                scale_factors[i] if rescale else det_bboxes[i][:, :4]\n                for i in range(len(det_bboxes))\n            ]\n            mask_preds = self.forward(feats[0], det_coeffs, _bboxes, img_metas)\n            # apply mask post-processing to each image individually\n            segm_results = []\n            for i in range(num_imgs):\n                if det_bboxes[i].shape[0] == 0:\n                    segm_results.append([[] for _ in range(self.num_classes)])\n                else:\n                    segm_result = self.get_seg_masks(mask_preds[i],\n                                                     det_labels[i],\n                                                     img_metas[i], rescale)\n                    segm_results.append(segm_result)\n        return segm_results\n\n\nclass InterpolateModule(BaseModule):\n    \"\"\"This is a module version of F.interpolate.\n\n    Any arguments you give it just get passed along for the ride.\n    \"\"\"\n\n    def __init__(self, *args, init_cfg=None, **kwargs):\n        super().__init__(init_cfg)\n\n        self.args = args\n        self.kwargs = kwargs\n\n    def forward(self, x):\n        \"\"\"Forward features from the upstream network.\"\"\"\n        return F.interpolate(x, *self.args, **self.kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/yolo_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Copyright (c) 2019 Western Digital Corporation or its affiliates.\n\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (ConvModule, bias_init_with_prob, constant_init, is_norm,\n                      normal_init)\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (build_assigner, build_bbox_coder,\n                        build_prior_generator, build_sampler, images_to_levels,\n                        multi_apply, multiclass_nms)\nfrom ..builder import HEADS, build_loss\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\n\n\n@HEADS.register_module()\nclass YOLOV3Head(BaseDenseHead, BBoxTestMixin):\n    \"\"\"YOLOV3Head Paper link: https://arxiv.org/abs/1804.02767.\n\n    Args:\n        num_classes (int): The number of object classes (w/o background)\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (List[int]): The number of output channels per scale\n            before the final 1x1 layer. Default: (1024, 512, 256).\n        anchor_generator (dict): Config dict for anchor generator\n        bbox_coder (dict): Config of bounding box coder.\n        featmap_strides (List[int]): The stride of each scale.\n            Should be in descending order. Default: (32, 16, 8).\n        one_hot_smoother (float): Set a non-zero value to enable label-smooth\n            Default: 0.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: dict(type='BN', requires_grad=True)\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='LeakyReLU', negative_slope=0.1).\n        loss_cls (dict): Config of classification loss.\n        loss_conf (dict): Config of confidence loss.\n        loss_xy (dict): Config of xy coordinate loss.\n        loss_wh (dict): Config of wh coordinate loss.\n        train_cfg (dict): Training config of YOLOV3 head. Default: None.\n        test_cfg (dict): Testing config of YOLOV3 head. Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 out_channels=(1024, 512, 256),\n                 anchor_generator=dict(\n                     type='YOLOAnchorGenerator',\n                     base_sizes=[[(116, 90), (156, 198), (373, 326)],\n                                 [(30, 61), (62, 45), (59, 119)],\n                                 [(10, 13), (16, 30), (33, 23)]],\n                     strides=[32, 16, 8]),\n                 bbox_coder=dict(type='YOLOBBoxCoder'),\n                 featmap_strides=[32, 16, 8],\n                 one_hot_smoother=0.,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 act_cfg=dict(type='LeakyReLU', negative_slope=0.1),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_conf=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_xy=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_wh=dict(type='MSELoss', loss_weight=1.0),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=dict(\n                     type='Normal', std=0.01,\n                     override=dict(name='convs_pred'))):\n        super(YOLOV3Head, self).__init__(init_cfg)\n        # Check params\n        assert (len(in_channels) == len(out_channels) == len(featmap_strides))\n\n        self.num_classes = num_classes\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.featmap_strides = featmap_strides\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            if hasattr(self.train_cfg, 'sampler'):\n                sampler_cfg = self.train_cfg.sampler\n            else:\n                sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n        self.fp16_enabled = False\n\n        self.one_hot_smoother = one_hot_smoother\n\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n\n        self.prior_generator = build_prior_generator(anchor_generator)\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_conf = build_loss(loss_conf)\n        self.loss_xy = build_loss(loss_xy)\n        self.loss_wh = build_loss(loss_wh)\n\n        self.num_base_priors = self.prior_generator.num_base_priors[0]\n        assert len(\n            self.prior_generator.num_base_priors) == len(featmap_strides)\n        self._init_layers()\n\n    @property\n    def anchor_generator(self):\n\n        warnings.warn('DeprecationWarning: `anchor_generator` is deprecated, '\n                      'please use \"prior_generator\" instead')\n        return self.prior_generator\n\n    @property\n    def num_anchors(self):\n        \"\"\"\n        Returns:\n            int: Number of anchors on each point of feature map.\n        \"\"\"\n        warnings.warn('DeprecationWarning: `num_anchors` is deprecated, '\n                      'please use \"num_base_priors\" instead')\n        return self.num_base_priors\n\n    @property\n    def num_levels(self):\n        return len(self.featmap_strides)\n\n    @property\n    def num_attrib(self):\n        \"\"\"int: number of attributes in pred_map, bboxes (4) +\n        objectness (1) + num_classes\"\"\"\n\n        return 5 + self.num_classes\n\n    def _init_layers(self):\n        self.convs_bridge = nn.ModuleList()\n        self.convs_pred = nn.ModuleList()\n        for i in range(self.num_levels):\n            conv_bridge = ConvModule(\n                self.in_channels[i],\n                self.out_channels[i],\n                3,\n                padding=1,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg,\n                act_cfg=self.act_cfg)\n            conv_pred = nn.Conv2d(self.out_channels[i],\n                                  self.num_base_priors * self.num_attrib, 1)\n\n            self.convs_bridge.append(conv_bridge)\n            self.convs_pred.append(conv_pred)\n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, mean=0, std=0.01)\n            if is_norm(m):\n                constant_init(m, 1)\n\n        # Use prior in model initialization to improve stability\n        for conv_pred, stride in zip(self.convs_pred, self.featmap_strides):\n            bias = conv_pred.bias.reshape(self.num_base_priors, -1)\n            # init objectness with prior of 8 objects per feature map\n            # refer to https://github.com/ultralytics/yolov3\n            nn.init.constant_(bias.data[:, 4],\n                              bias_init_with_prob(8 / (608 / stride)**2))\n            nn.init.constant_(bias.data[:, 5:], bias_init_with_prob(0.01))\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n\n        Returns:\n            tuple[Tensor]: A tuple of multi-level predication map, each is a\n                4D-tensor of shape (batch_size, 5+num_classes, height, width).\n        \"\"\"\n\n        assert len(feats) == self.num_levels\n        pred_maps = []\n        for i in range(self.num_levels):\n            x = feats[i]\n            x = self.convs_bridge[i](x)\n            pred_map = self.convs_pred[i](x)\n            pred_maps.append(pred_map)\n\n        return tuple(pred_maps),\n\n    @force_fp32(apply_to=('pred_maps', ))\n    def get_bboxes(self,\n                   pred_maps,\n                   img_metas,\n                   cfg=None,\n                   rescale=False,\n                   with_nms=True):\n        \"\"\"Transform network output for a batch into bbox predictions. It has\n        been accelerated since PR #5991.\n\n        Args:\n            pred_maps (list[Tensor]): Raw predictions for a batch of images.\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            cfg (mmcv.Config | None): Test / postprocessing configuration,\n                if None, test_cfg would be used. Default: None.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default: True.\n\n        Returns:\n            list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is an (n, 5) tensor, where 5 represent\n                (tl_x, tl_y, br_x, br_y, score) and the score between 0 and 1.\n                The shape of the second tensor in the tuple is (n,), and\n                each element represents the class label of the corresponding\n                box.\n        \"\"\"\n        assert len(pred_maps) == self.num_levels\n        cfg = self.test_cfg if cfg is None else cfg\n        scale_factors = np.array(\n            [img_meta['scale_factor'] for img_meta in img_metas])\n\n        num_imgs = len(img_metas)\n        featmap_sizes = [pred_map.shape[-2:] for pred_map in pred_maps]\n\n        mlvl_anchors = self.prior_generator.grid_priors(\n            featmap_sizes, device=pred_maps[0].device)\n        flatten_preds = []\n        flatten_strides = []\n        for pred, stride in zip(pred_maps, self.featmap_strides):\n            pred = pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,\n                                                    self.num_attrib)\n            pred[..., :2].sigmoid_()\n            flatten_preds.append(pred)\n            flatten_strides.append(\n                pred.new_tensor(stride).expand(pred.size(1)))\n\n        flatten_preds = torch.cat(flatten_preds, dim=1)\n        flatten_bbox_preds = flatten_preds[..., :4]\n        flatten_objectness = flatten_preds[..., 4].sigmoid()\n        flatten_cls_scores = flatten_preds[..., 5:].sigmoid()\n        flatten_anchors = torch.cat(mlvl_anchors)\n        flatten_strides = torch.cat(flatten_strides)\n        flatten_bboxes = self.bbox_coder.decode(flatten_anchors,\n                                                flatten_bbox_preds,\n                                                flatten_strides.unsqueeze(-1))\n\n        if with_nms and (flatten_objectness.size(0) == 0):\n            return torch.zeros((0, 5)), torch.zeros((0, ))\n\n        if rescale:\n            flatten_bboxes /= flatten_bboxes.new_tensor(\n                scale_factors).unsqueeze(1)\n\n        padding = flatten_bboxes.new_zeros(num_imgs, flatten_bboxes.shape[1],\n                                           1)\n        flatten_cls_scores = torch.cat([flatten_cls_scores, padding], dim=-1)\n\n        det_results = []\n        for (bboxes, scores, objectness) in zip(flatten_bboxes,\n                                                flatten_cls_scores,\n                                                flatten_objectness):\n            # Filtering out all predictions with conf < conf_thr\n            conf_thr = cfg.get('conf_thr', -1)\n            if conf_thr > 0:\n                conf_inds = objectness >= conf_thr\n                bboxes = bboxes[conf_inds, :]\n                scores = scores[conf_inds, :]\n                objectness = objectness[conf_inds]\n\n            det_bboxes, det_labels = multiclass_nms(\n                bboxes,\n                scores,\n                cfg.score_thr,\n                cfg.nms,\n                cfg.max_per_img,\n                score_factors=objectness)\n            det_results.append(tuple([det_bboxes, det_labels]))\n        return det_results\n\n    @force_fp32(apply_to=('pred_maps', ))\n    def loss(self,\n             pred_maps,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n\n        Args:\n            pred_maps (list[Tensor]): Prediction map for each scale level,\n                shape (N, num_anchors * num_attrib, H, W)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        num_imgs = len(img_metas)\n        device = pred_maps[0][0].device\n\n        featmap_sizes = [\n            pred_maps[i].shape[-2:] for i in range(self.num_levels)\n        ]\n        mlvl_anchors = self.prior_generator.grid_priors(\n            featmap_sizes, device=device)\n        anchor_list = [mlvl_anchors for _ in range(num_imgs)]\n\n        responsible_flag_list = []\n        for img_id in range(len(img_metas)):\n            responsible_flag_list.append(\n                self.prior_generator.responsible_flags(featmap_sizes,\n                                                       gt_bboxes[img_id],\n                                                       device))\n\n        target_maps_list, neg_maps_list = self.get_targets(\n            anchor_list, responsible_flag_list, gt_bboxes, gt_labels)\n\n        losses_cls, losses_conf, losses_xy, losses_wh = multi_apply(\n            self.loss_single, pred_maps, target_maps_list, neg_maps_list)\n\n        return dict(\n            loss_cls=losses_cls,\n            loss_conf=losses_conf,\n            loss_xy=losses_xy,\n            loss_wh=losses_wh)\n\n    def loss_single(self, pred_map, target_map, neg_map):\n        \"\"\"Compute loss of a single image from a batch.\n\n        Args:\n            pred_map (Tensor): Raw predictions for a single level.\n            target_map (Tensor): The Ground-Truth target for a single level.\n            neg_map (Tensor): The negative masks for a single level.\n\n        Returns:\n            tuple:\n                loss_cls (Tensor): Classification loss.\n                loss_conf (Tensor): Confidence loss.\n                loss_xy (Tensor): Regression loss of x, y coordinate.\n                loss_wh (Tensor): Regression loss of w, h coordinate.\n        \"\"\"\n\n        num_imgs = len(pred_map)\n        pred_map = pred_map.permute(0, 2, 3,\n                                    1).reshape(num_imgs, -1, self.num_attrib)\n        neg_mask = neg_map.float()\n        pos_mask = target_map[..., 4]\n        pos_and_neg_mask = neg_mask + pos_mask\n        pos_mask = pos_mask.unsqueeze(dim=-1)\n        if torch.max(pos_and_neg_mask) > 1.:\n            warnings.warn('There is overlap between pos and neg sample.')\n            pos_and_neg_mask = pos_and_neg_mask.clamp(min=0., max=1.)\n\n        pred_xy = pred_map[..., :2]\n        pred_wh = pred_map[..., 2:4]\n        pred_conf = pred_map[..., 4]\n        pred_label = pred_map[..., 5:]\n\n        target_xy = target_map[..., :2]\n        target_wh = target_map[..., 2:4]\n        target_conf = target_map[..., 4]\n        target_label = target_map[..., 5:]\n\n        loss_cls = self.loss_cls(pred_label, target_label, weight=pos_mask)\n        loss_conf = self.loss_conf(\n            pred_conf, target_conf, weight=pos_and_neg_mask)\n        loss_xy = self.loss_xy(pred_xy, target_xy, weight=pos_mask)\n        loss_wh = self.loss_wh(pred_wh, target_wh, weight=pos_mask)\n\n        return loss_cls, loss_conf, loss_xy, loss_wh\n\n    def get_targets(self, anchor_list, responsible_flag_list, gt_bboxes_list,\n                    gt_labels_list):\n        \"\"\"Compute target maps for anchors in multiple images.\n\n        Args:\n            anchor_list (list[list[Tensor]]): Multi level anchors of each\n                image. The outer list indicates images, and the inner list\n                corresponds to feature levels of the image. Each element of\n                the inner list is a tensor of shape (num_total_anchors, 4).\n            responsible_flag_list (list[list[Tensor]]): Multi level responsible\n                flags of each image. Each element is a tensor of shape\n                (num_total_anchors, )\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            gt_labels_list (list[Tensor]): Ground truth labels of each box.\n\n        Returns:\n            tuple: Usually returns a tuple containing learning targets.\n                - target_map_list (list[Tensor]): Target map of each level.\n                - neg_map_list (list[Tensor]): Negative map of each level.\n        \"\"\"\n        num_imgs = len(anchor_list)\n\n        # anchor number of multi levels\n        num_level_anchors = [anchors.size(0) for anchors in anchor_list[0]]\n\n        results = multi_apply(self._get_targets_single, anchor_list,\n                              responsible_flag_list, gt_bboxes_list,\n                              gt_labels_list)\n\n        all_target_maps, all_neg_maps = results\n        assert num_imgs == len(all_target_maps) == len(all_neg_maps)\n        target_maps_list = images_to_levels(all_target_maps, num_level_anchors)\n        neg_maps_list = images_to_levels(all_neg_maps, num_level_anchors)\n\n        return target_maps_list, neg_maps_list\n\n    def _get_targets_single(self, anchors, responsible_flags, gt_bboxes,\n                            gt_labels):\n        \"\"\"Generate matching bounding box prior and converted GT.\n\n        Args:\n            anchors (list[Tensor]): Multi-level anchors of the image.\n            responsible_flags (list[Tensor]): Multi-level responsible flags of\n                anchors\n            gt_bboxes (Tensor): Ground truth bboxes of single image.\n            gt_labels (Tensor): Ground truth labels of single image.\n\n        Returns:\n            tuple:\n                target_map (Tensor): Predication target map of each\n                    scale level, shape (num_total_anchors,\n                    5+num_classes)\n                neg_map (Tensor): Negative map of each scale level,\n                    shape (num_total_anchors,)\n        \"\"\"\n\n        anchor_strides = []\n        for i in range(len(anchors)):\n            anchor_strides.append(\n                torch.tensor(self.featmap_strides[i],\n                             device=gt_bboxes.device).repeat(len(anchors[i])))\n        concat_anchors = torch.cat(anchors)\n        concat_responsible_flags = torch.cat(responsible_flags)\n\n        anchor_strides = torch.cat(anchor_strides)\n        assert len(anchor_strides) == len(concat_anchors) == \\\n               len(concat_responsible_flags)\n        assign_result = self.assigner.assign(concat_anchors,\n                                             concat_responsible_flags,\n                                             gt_bboxes)\n        sampling_result = self.sampler.sample(assign_result, concat_anchors,\n                                              gt_bboxes)\n\n        target_map = concat_anchors.new_zeros(\n            concat_anchors.size(0), self.num_attrib)\n\n        target_map[sampling_result.pos_inds, :4] = self.bbox_coder.encode(\n            sampling_result.pos_bboxes, sampling_result.pos_gt_bboxes,\n            anchor_strides[sampling_result.pos_inds])\n\n        target_map[sampling_result.pos_inds, 4] = 1\n\n        gt_labels_one_hot = F.one_hot(\n            gt_labels, num_classes=self.num_classes).float()\n        if self.one_hot_smoother != 0:  # label smooth\n            gt_labels_one_hot = gt_labels_one_hot * (\n                1 - self.one_hot_smoother\n            ) + self.one_hot_smoother / self.num_classes\n        target_map[sampling_result.pos_inds, 5:] = gt_labels_one_hot[\n            sampling_result.pos_assigned_gt_inds]\n\n        neg_map = concat_anchors.new_zeros(\n            concat_anchors.size(0), dtype=torch.uint8)\n        neg_map[sampling_result.neg_inds] = 1\n\n        return target_map, neg_map\n\n    def aug_test(self, feats, img_metas, rescale=False):\n        \"\"\"Test function with test time augmentation.\n\n        Args:\n            feats (list[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains features for all images in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch. each dict has image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[ndarray]: bbox results of each class\n        \"\"\"\n        return self.aug_test_bboxes(feats, img_metas, rescale=rescale)\n\n    @force_fp32(apply_to=('pred_maps'))\n    def onnx_export(self, pred_maps, img_metas, with_nms=True):\n        num_levels = len(pred_maps)\n        pred_maps_list = [pred_maps[i].detach() for i in range(num_levels)]\n\n        cfg = self.test_cfg\n        assert len(pred_maps_list) == self.num_levels\n\n        device = pred_maps_list[0].device\n        batch_size = pred_maps_list[0].shape[0]\n\n        featmap_sizes = [\n            pred_maps_list[i].shape[-2:] for i in range(self.num_levels)\n        ]\n        mlvl_anchors = self.prior_generator.grid_priors(\n            featmap_sizes, device=device)\n        # convert to tensor to keep tracing\n        nms_pre_tensor = torch.tensor(\n            cfg.get('nms_pre', -1), device=device, dtype=torch.long)\n\n        multi_lvl_bboxes = []\n        multi_lvl_cls_scores = []\n        multi_lvl_conf_scores = []\n        for i in range(self.num_levels):\n            # get some key info for current scale\n            pred_map = pred_maps_list[i]\n            stride = self.featmap_strides[i]\n            # (b,h, w, num_anchors*num_attrib) ->\n            # (b,h*w*num_anchors, num_attrib)\n            pred_map = pred_map.permute(0, 2, 3,\n                                        1).reshape(batch_size, -1,\n                                                   self.num_attrib)\n            # Inplace operation like\n            # ```pred_map[..., :2] = \\torch.sigmoid(pred_map[..., :2])```\n            # would create constant tensor when exporting to onnx\n            pred_map_conf = torch.sigmoid(pred_map[..., :2])\n            pred_map_rest = pred_map[..., 2:]\n            pred_map = torch.cat([pred_map_conf, pred_map_rest], dim=-1)\n            pred_map_boxes = pred_map[..., :4]\n            multi_lvl_anchor = mlvl_anchors[i]\n            multi_lvl_anchor = multi_lvl_anchor.expand_as(pred_map_boxes)\n            bbox_pred = self.bbox_coder.decode(multi_lvl_anchor,\n                                               pred_map_boxes, stride)\n            # conf and cls\n            conf_pred = torch.sigmoid(pred_map[..., 4])\n            cls_pred = torch.sigmoid(pred_map[..., 5:]).view(\n                batch_size, -1, self.num_classes)  # Cls pred one-hot.\n\n            # Get top-k prediction\n            from mmdet.core.export import get_k_for_topk\n            nms_pre = get_k_for_topk(nms_pre_tensor, bbox_pred.shape[1])\n            if nms_pre > 0:\n                _, topk_inds = conf_pred.topk(nms_pre)\n                batch_inds = torch.arange(batch_size).view(\n                    -1, 1).expand_as(topk_inds).long()\n                # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501\n                transformed_inds = (\n                    bbox_pred.shape[1] * batch_inds + topk_inds)\n                bbox_pred = bbox_pred.reshape(-1,\n                                              4)[transformed_inds, :].reshape(\n                                                  batch_size, -1, 4)\n                cls_pred = cls_pred.reshape(\n                    -1, self.num_classes)[transformed_inds, :].reshape(\n                        batch_size, -1, self.num_classes)\n                conf_pred = conf_pred.reshape(-1, 1)[transformed_inds].reshape(\n                    batch_size, -1)\n\n            # Save the result of current scale\n            multi_lvl_bboxes.append(bbox_pred)\n            multi_lvl_cls_scores.append(cls_pred)\n            multi_lvl_conf_scores.append(conf_pred)\n\n        # Merge the results of different scales together\n        batch_mlvl_bboxes = torch.cat(multi_lvl_bboxes, dim=1)\n        batch_mlvl_scores = torch.cat(multi_lvl_cls_scores, dim=1)\n        batch_mlvl_conf_scores = torch.cat(multi_lvl_conf_scores, dim=1)\n\n        # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment\n        from mmdet.core.export import add_dummy_nms_for_onnx\n        conf_thr = cfg.get('conf_thr', -1)\n        score_thr = cfg.get('score_thr', -1)\n        # follow original pipeline of YOLOv3\n        if conf_thr > 0:\n            mask = (batch_mlvl_conf_scores >= conf_thr).float()\n            batch_mlvl_conf_scores *= mask\n        if score_thr > 0:\n            mask = (batch_mlvl_scores > score_thr).float()\n            batch_mlvl_scores *= mask\n        batch_mlvl_conf_scores = batch_mlvl_conf_scores.unsqueeze(2).expand_as(\n            batch_mlvl_scores)\n        batch_mlvl_scores = batch_mlvl_scores * batch_mlvl_conf_scores\n        if with_nms:\n            max_output_boxes_per_class = cfg.nms.get(\n                'max_output_boxes_per_class', 200)\n            iou_threshold = cfg.nms.get('iou_threshold', 0.5)\n            # keep aligned with original pipeline, improve\n            # mAP by 1% for YOLOv3 in ONNX\n            score_threshold = 0\n            nms_pre = cfg.get('deploy_nms_pre', -1)\n            return add_dummy_nms_for_onnx(\n                batch_mlvl_bboxes,\n                batch_mlvl_scores,\n                max_output_boxes_per_class,\n                iou_threshold,\n                score_threshold,\n                nms_pre,\n                cfg.max_per_img,\n            )\n        else:\n            return batch_mlvl_bboxes, batch_mlvl_scores\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/yolof_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import (ConvModule, bias_init_with_prob, constant_init, is_norm,\n                      normal_init)\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import anchor_inside_flags, multi_apply, reduce_mean, unmap\nfrom ..builder import HEADS\nfrom .anchor_head import AnchorHead\n\nINF = 1e8\n\n\ndef levels_to_images(mlvl_tensor):\n    \"\"\"Concat multi-level feature maps by image.\n\n    [feature_level0, feature_level1...] -> [feature_image0, feature_image1...]\n    Convert the shape of each element in mlvl_tensor from (N, C, H, W) to\n    (N, H*W , C), then split the element to N elements with shape (H*W, C), and\n    concat elements in same image of all level along first dimension.\n\n    Args:\n        mlvl_tensor (list[torch.Tensor]): list of Tensor which collect from\n            corresponding level. Each element is of shape (N, C, H, W)\n\n    Returns:\n        list[torch.Tensor]: A list that contains N tensors and each tensor is\n            of shape (num_elements, C)\n    \"\"\"\n    batch_size = mlvl_tensor[0].size(0)\n    batch_list = [[] for _ in range(batch_size)]\n    channels = mlvl_tensor[0].size(1)\n    for t in mlvl_tensor:\n        t = t.permute(0, 2, 3, 1)\n        t = t.view(batch_size, -1, channels).contiguous()\n        for img in range(batch_size):\n            batch_list[img].append(t[img])\n    return [torch.cat(item, 0) for item in batch_list]\n\n\n@HEADS.register_module()\nclass YOLOFHead(AnchorHead):\n    \"\"\"YOLOFHead Paper link: https://arxiv.org/abs/2103.09460.\n\n    Args:\n        num_classes (int): The number of object classes (w/o background)\n        in_channels (List[int]): The number of input channels per scale.\n        cls_num_convs (int): The number of convolutions of cls branch.\n           Default 2.\n        reg_num_convs (int): The number of convolutions of reg branch.\n           Default 4.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 num_cls_convs=2,\n                 num_reg_convs=4,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 **kwargs):\n        self.num_cls_convs = num_cls_convs\n        self.num_reg_convs = num_reg_convs\n        self.norm_cfg = norm_cfg\n        super(YOLOFHead, self).__init__(num_classes, in_channels, **kwargs)\n\n    def _init_layers(self):\n        cls_subnet = []\n        bbox_subnet = []\n        for i in range(self.num_cls_convs):\n            cls_subnet.append(\n                ConvModule(\n                    self.in_channels,\n                    self.in_channels,\n                    kernel_size=3,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n        for i in range(self.num_reg_convs):\n            bbox_subnet.append(\n                ConvModule(\n                    self.in_channels,\n                    self.in_channels,\n                    kernel_size=3,\n                    padding=1,\n                    norm_cfg=self.norm_cfg))\n        self.cls_subnet = nn.Sequential(*cls_subnet)\n        self.bbox_subnet = nn.Sequential(*bbox_subnet)\n        self.cls_score = nn.Conv2d(\n            self.in_channels,\n            self.num_base_priors * self.num_classes,\n            kernel_size=3,\n            stride=1,\n            padding=1)\n        self.bbox_pred = nn.Conv2d(\n            self.in_channels,\n            self.num_base_priors * 4,\n            kernel_size=3,\n            stride=1,\n            padding=1)\n        self.object_pred = nn.Conv2d(\n            self.in_channels,\n            self.num_base_priors,\n            kernel_size=3,\n            stride=1,\n            padding=1)\n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, mean=0, std=0.01)\n            if is_norm(m):\n                constant_init(m, 1)\n\n        # Use prior in model initialization to improve stability\n        bias_cls = bias_init_with_prob(0.01)\n        torch.nn.init.constant_(self.cls_score.bias, bias_cls)\n\n    def forward_single(self, feature):\n        cls_score = self.cls_score(self.cls_subnet(feature))\n        N, _, H, W = cls_score.shape\n        cls_score = cls_score.view(N, -1, self.num_classes, H, W)\n\n        reg_feat = self.bbox_subnet(feature)\n        bbox_reg = self.bbox_pred(reg_feat)\n        objectness = self.object_pred(reg_feat)\n\n        # implicit objectness\n        objectness = objectness.view(N, -1, 1, H, W)\n        normalized_cls_score = cls_score + objectness - torch.log(\n            1. + torch.clamp(cls_score.exp(), max=INF) +\n            torch.clamp(objectness.exp(), max=INF))\n        normalized_cls_score = normalized_cls_score.view(N, -1, H, W)\n        return normalized_cls_score, bbox_reg\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute losses of the head.\n\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level\n                Has shape (batch, num_anchors * num_classes, h, w)\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level with shape (batch, num_anchors * 4, h, w)\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss. Default: None\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        assert len(cls_scores) == 1\n        assert self.prior_generator.num_levels == 1\n\n        device = cls_scores[0].device\n        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]\n        anchor_list, valid_flag_list = self.get_anchors(\n            featmap_sizes, img_metas, device=device)\n\n        # The output level is always 1\n        anchor_list = [anchors[0] for anchors in anchor_list]\n        valid_flag_list = [valid_flags[0] for valid_flags in valid_flag_list]\n\n        cls_scores_list = levels_to_images(cls_scores)\n        bbox_preds_list = levels_to_images(bbox_preds)\n\n        label_channels = self.cls_out_channels if self.use_sigmoid_cls else 1\n        cls_reg_targets = self.get_targets(\n            cls_scores_list,\n            bbox_preds_list,\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes,\n            img_metas,\n            gt_bboxes_ignore_list=gt_bboxes_ignore,\n            gt_labels_list=gt_labels,\n            label_channels=label_channels)\n        if cls_reg_targets is None:\n            return None\n        (batch_labels, batch_label_weights, num_total_pos, num_total_neg,\n         batch_bbox_weights, batch_pos_predicted_boxes,\n         batch_target_boxes) = cls_reg_targets\n\n        flatten_labels = batch_labels.reshape(-1)\n        batch_label_weights = batch_label_weights.reshape(-1)\n        cls_score = cls_scores[0].permute(0, 2, 3,\n                                          1).reshape(-1, self.cls_out_channels)\n\n        num_total_samples = (num_total_pos +\n                             num_total_neg) if self.sampling else num_total_pos\n        num_total_samples = reduce_mean(\n            cls_score.new_tensor(num_total_samples)).clamp_(1.0).item()\n\n        # classification loss\n        loss_cls = self.loss_cls(\n            cls_score,\n            flatten_labels,\n            batch_label_weights,\n            avg_factor=num_total_samples)\n\n        # regression loss\n        if batch_pos_predicted_boxes.shape[0] == 0:\n            # no pos sample\n            loss_bbox = batch_pos_predicted_boxes.sum() * 0\n        else:\n            loss_bbox = self.loss_bbox(\n                batch_pos_predicted_boxes,\n                batch_target_boxes,\n                batch_bbox_weights.float(),\n                avg_factor=num_total_samples)\n\n        return dict(loss_cls=loss_cls, loss_bbox=loss_bbox)\n\n    def get_targets(self,\n                    cls_scores_list,\n                    bbox_preds_list,\n                    anchor_list,\n                    valid_flag_list,\n                    gt_bboxes_list,\n                    img_metas,\n                    gt_bboxes_ignore_list=None,\n                    gt_labels_list=None,\n                    label_channels=1,\n                    unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in\n        multiple images.\n\n        Args:\n            cls_scores_list (list[Tensor])： Classification scores of\n                each image. each is a 4D-tensor, the shape is\n                (h * w, num_anchors * num_classes).\n            bbox_preds_list (list[Tensor])： Bbox preds of each image.\n                each is a 4D-tensor, the shape is (h * w, num_anchors * 4).\n            anchor_list (list[Tensor]): Anchors of each image. Each element of\n                is a tensor of shape (h * w * num_anchors, 4).\n            valid_flag_list (list[Tensor]): Valid flags of each image. Each\n               element of is a tensor of shape (h * w * num_anchors, )\n            gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.\n            img_metas (list[dict]): Meta info of each image.\n            gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be\n                ignored.\n            gt_labels_list (list[Tensor]): Ground truth labels of each box.\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple: Usually returns a tuple containing learning targets.\n\n                - batch_labels (Tensor): Label of all images. Each element \\\n                    of is a tensor of shape (batch, h * w * num_anchors)\n                - batch_label_weights (Tensor): Label weights of all images \\\n                    of is a tensor of shape (batch, h * w * num_anchors)\n                - num_total_pos (int): Number of positive samples in all \\\n                    images.\n                - num_total_neg (int): Number of negative samples in all \\\n                    images.\n            additional_returns: This function enables user-defined returns from\n                `self._get_targets_single`. These returns are currently refined\n                to properties at each feature map (i.e. having HxW dimension).\n                The results will be concatenated after the end\n        \"\"\"\n        num_imgs = len(img_metas)\n        assert len(anchor_list) == len(valid_flag_list) == num_imgs\n\n        # compute targets for each image\n        if gt_bboxes_ignore_list is None:\n            gt_bboxes_ignore_list = [None for _ in range(num_imgs)]\n        if gt_labels_list is None:\n            gt_labels_list = [None for _ in range(num_imgs)]\n        results = multi_apply(\n            self._get_targets_single,\n            bbox_preds_list,\n            anchor_list,\n            valid_flag_list,\n            gt_bboxes_list,\n            gt_bboxes_ignore_list,\n            gt_labels_list,\n            img_metas,\n            label_channels=label_channels,\n            unmap_outputs=unmap_outputs)\n        (all_labels, all_label_weights, pos_inds_list, neg_inds_list,\n         sampling_results_list) = results[:5]\n        rest_results = list(results[5:])  # user-added return values\n        # no valid anchors\n        if any([labels is None for labels in all_labels]):\n            return None\n        # sampled anchors of all images\n        num_total_pos = sum([max(inds.numel(), 1) for inds in pos_inds_list])\n        num_total_neg = sum([max(inds.numel(), 1) for inds in neg_inds_list])\n\n        batch_labels = torch.stack(all_labels, 0)\n        batch_label_weights = torch.stack(all_label_weights, 0)\n\n        res = (batch_labels, batch_label_weights, num_total_pos, num_total_neg)\n        for i, rests in enumerate(rest_results):  # user-added return values\n            rest_results[i] = torch.cat(rests, 0)\n\n        return res + tuple(rest_results)\n\n    def _get_targets_single(self,\n                            bbox_preds,\n                            flat_anchors,\n                            valid_flags,\n                            gt_bboxes,\n                            gt_bboxes_ignore,\n                            gt_labels,\n                            img_meta,\n                            label_channels=1,\n                            unmap_outputs=True):\n        \"\"\"Compute regression and classification targets for anchors in a\n        single image.\n\n        Args:\n            bbox_preds (Tensor): Bbox prediction of the image, which\n                shape is (h * w ,4)\n            flat_anchors (Tensor): Anchors of the image, which shape is\n                (h * w * num_anchors ,4)\n            valid_flags (Tensor): Valid flags of the image, which shape is\n                (h * w * num_anchors,).\n            gt_bboxes (Tensor): Ground truth bboxes of the image,\n                shape (num_gts, 4).\n            gt_bboxes_ignore (Tensor): Ground truth bboxes to be\n                ignored, shape (num_ignored_gts, 4).\n            img_meta (dict): Meta info of the image.\n            gt_labels (Tensor): Ground truth labels of each box,\n                shape (num_gts,).\n            label_channels (int): Channel of label.\n            unmap_outputs (bool): Whether to map outputs back to the original\n                set of anchors.\n\n        Returns:\n            tuple:\n                labels (Tensor): Labels of image, which shape is\n                    (h * w * num_anchors, ).\n                label_weights (Tensor): Label weights of image, which shape is\n                    (h * w * num_anchors, ).\n                pos_inds (Tensor): Pos index of image.\n                neg_inds (Tensor): Neg index of image.\n                sampling_result (obj:`SamplingResult`): Sampling result.\n                pos_bbox_weights (Tensor): The Weight of using to calculate\n                    the bbox branch loss, which shape is (num, ).\n                pos_predicted_boxes (Tensor): boxes predicted value of\n                    using to calculate the bbox branch loss, which shape is\n                    (num, 4).\n                pos_target_boxes (Tensor): boxes target value of\n                    using to calculate the bbox branch loss, which shape is\n                    (num, 4).\n        \"\"\"\n        inside_flags = anchor_inside_flags(flat_anchors, valid_flags,\n                                           img_meta['img_shape'][:2],\n                                           self.train_cfg.allowed_border)\n        if not inside_flags.any():\n            return (None, ) * 8\n        # assign gt and sample anchors\n        anchors = flat_anchors[inside_flags, :]\n        bbox_preds = bbox_preds.reshape(-1, 4)\n        bbox_preds = bbox_preds[inside_flags, :]\n\n        # decoded bbox\n        decoder_bbox_preds = self.bbox_coder.decode(anchors, bbox_preds)\n        assign_result = self.assigner.assign(\n            decoder_bbox_preds, anchors, gt_bboxes, gt_bboxes_ignore,\n            None if self.sampling else gt_labels)\n\n        pos_bbox_weights = assign_result.get_extra_property('pos_idx')\n        pos_predicted_boxes = assign_result.get_extra_property(\n            'pos_predicted_boxes')\n        pos_target_boxes = assign_result.get_extra_property('target_boxes')\n\n        sampling_result = self.sampler.sample(assign_result, anchors,\n                                              gt_bboxes)\n        num_valid_anchors = anchors.shape[0]\n        labels = anchors.new_full((num_valid_anchors, ),\n                                  self.num_classes,\n                                  dtype=torch.long)\n        label_weights = anchors.new_zeros(num_valid_anchors, dtype=torch.float)\n\n        pos_inds = sampling_result.pos_inds\n        neg_inds = sampling_result.neg_inds\n        if len(pos_inds) > 0:\n            if gt_labels is None:\n                # Only rpn gives gt_labels as None\n                # Foreground is the first class since v2.5.0\n                labels[pos_inds] = 0\n            else:\n                labels[pos_inds] = gt_labels[\n                    sampling_result.pos_assigned_gt_inds]\n            if self.train_cfg.pos_weight <= 0:\n                label_weights[pos_inds] = 1.0\n            else:\n                label_weights[pos_inds] = self.train_cfg.pos_weight\n        if len(neg_inds) > 0:\n            label_weights[neg_inds] = 1.0\n\n        # map up to original set of anchors\n        if unmap_outputs:\n            num_total_anchors = flat_anchors.size(0)\n            labels = unmap(\n                labels, num_total_anchors, inside_flags,\n                fill=self.num_classes)  # fill bg label\n            label_weights = unmap(label_weights, num_total_anchors,\n                                  inside_flags)\n\n        return (labels, label_weights, pos_inds, neg_inds, sampling_result,\n                pos_bbox_weights, pos_predicted_boxes, pos_target_boxes)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/dense_heads/yolox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,\n                      bias_init_with_prob)\nfrom mmcv.ops.nms import batched_nms\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.core import (MlvlPointGenerator, bbox_xyxy_to_cxcywh,\n                        build_assigner, build_sampler, multi_apply,\n                        reduce_mean)\nfrom ..builder import HEADS, build_loss\nfrom .base_dense_head import BaseDenseHead\nfrom .dense_test_mixins import BBoxTestMixin\n\n\n@HEADS.register_module()\nclass YOLOXHead(BaseDenseHead, BBoxTestMixin):\n    \"\"\"YOLOXHead head used in `YOLOX <https://arxiv.org/abs/2107.08430>`_.\n\n    Args:\n        num_classes (int): Number of categories excluding the background\n            category.\n        in_channels (int): Number of channels in the input feature map.\n        feat_channels (int): Number of hidden channels in stacking convs.\n            Default: 256\n        stacked_convs (int): Number of stacking convs of the head.\n            Default: 2.\n        strides (tuple): Downsample factor of each feature map.\n        use_depthwise (bool): Whether to depthwise separable convolution in\n            blocks. Default: False\n        dcn_on_last_conv (bool): If true, use dcn in the last layer of\n            towers. Default: False.\n        conv_bias (bool | str): If specified as `auto`, it will be decided by\n            the norm_cfg. Bias of conv will be set as True if `norm_cfg` is\n            None, otherwise False. Default: \"auto\".\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        act_cfg (dict): Config dict for activation layer. Default: None.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox (dict): Config of localization loss.\n        loss_obj (dict): Config of objectness loss.\n        loss_l1 (dict): Config of L1 loss.\n        train_cfg (dict): Training config of anchor head.\n        test_cfg (dict): Testing config of anchor head.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 in_channels,\n                 feat_channels=256,\n                 stacked_convs=2,\n                 strides=[8, 16, 32],\n                 use_depthwise=False,\n                 dcn_on_last_conv=False,\n                 conv_bias='auto',\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     reduction='sum',\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='IoULoss',\n                     mode='square',\n                     eps=1e-16,\n                     reduction='sum',\n                     loss_weight=5.0),\n                 loss_obj=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     reduction='sum',\n                     loss_weight=1.0),\n                 loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0),\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=dict(\n                     type='Kaiming',\n                     layer='Conv2d',\n                     a=math.sqrt(5),\n                     distribution='uniform',\n                     mode='fan_in',\n                     nonlinearity='leaky_relu')):\n\n        super().__init__(init_cfg=init_cfg)\n        self.num_classes = num_classes\n        self.cls_out_channels = num_classes\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.stacked_convs = stacked_convs\n        self.strides = strides\n        self.use_depthwise = use_depthwise\n        self.dcn_on_last_conv = dcn_on_last_conv\n        assert conv_bias == 'auto' or isinstance(conv_bias, bool)\n        self.conv_bias = conv_bias\n        self.use_sigmoid_cls = True\n\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n        self.loss_obj = build_loss(loss_obj)\n\n        self.use_l1 = False  # This flag will be modified by hooks.\n        self.loss_l1 = build_loss(loss_l1)\n\n        self.prior_generator = MlvlPointGenerator(strides, offset=0)\n\n        self.test_cfg = test_cfg\n        self.train_cfg = train_cfg\n\n        self.sampling = False\n        if self.train_cfg:\n            self.assigner = build_assigner(self.train_cfg.assigner)\n            # sampling=False so use PseudoSampler\n            sampler_cfg = dict(type='PseudoSampler')\n            self.sampler = build_sampler(sampler_cfg, context=self)\n\n        self.fp16_enabled = False\n        self._init_layers()\n\n    def _init_layers(self):\n        self.multi_level_cls_convs = nn.ModuleList()\n        self.multi_level_reg_convs = nn.ModuleList()\n        self.multi_level_conv_cls = nn.ModuleList()\n        self.multi_level_conv_reg = nn.ModuleList()\n        self.multi_level_conv_obj = nn.ModuleList()\n        for _ in self.strides:\n            self.multi_level_cls_convs.append(self._build_stacked_convs())\n            self.multi_level_reg_convs.append(self._build_stacked_convs())\n            conv_cls, conv_reg, conv_obj = self._build_predictor()\n            self.multi_level_conv_cls.append(conv_cls)\n            self.multi_level_conv_reg.append(conv_reg)\n            self.multi_level_conv_obj.append(conv_obj)\n\n    def _build_stacked_convs(self):\n        \"\"\"Initialize conv layers of a single level head.\"\"\"\n        conv = DepthwiseSeparableConvModule \\\n            if self.use_depthwise else ConvModule\n        stacked_convs = []\n        for i in range(self.stacked_convs):\n            chn = self.in_channels if i == 0 else self.feat_channels\n            if self.dcn_on_last_conv and i == self.stacked_convs - 1:\n                conv_cfg = dict(type='DCNv2')\n            else:\n                conv_cfg = self.conv_cfg\n            stacked_convs.append(\n                conv(\n                    chn,\n                    self.feat_channels,\n                    3,\n                    stride=1,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    act_cfg=self.act_cfg,\n                    bias=self.conv_bias))\n        return nn.Sequential(*stacked_convs)\n\n    def _build_predictor(self):\n        \"\"\"Initialize predictor layers of a single level head.\"\"\"\n        conv_cls = nn.Conv2d(self.feat_channels, self.cls_out_channels, 1)\n        conv_reg = nn.Conv2d(self.feat_channels, 4, 1)\n        conv_obj = nn.Conv2d(self.feat_channels, 1, 1)\n        return conv_cls, conv_reg, conv_obj\n\n    def init_weights(self):\n        super(YOLOXHead, self).init_weights()\n        # Use prior in model initialization to improve stability\n        bias_init = bias_init_with_prob(0.01)\n        for conv_cls, conv_obj in zip(self.multi_level_conv_cls,\n                                      self.multi_level_conv_obj):\n            conv_cls.bias.data.fill_(bias_init)\n            conv_obj.bias.data.fill_(bias_init)\n\n    def forward_single(self, x, cls_convs, reg_convs, conv_cls, conv_reg,\n                       conv_obj):\n        \"\"\"Forward feature of a single scale level.\"\"\"\n\n        cls_feat = cls_convs(x)\n        reg_feat = reg_convs(x)\n\n        cls_score = conv_cls(cls_feat)\n        bbox_pred = conv_reg(reg_feat)\n        objectness = conv_obj(reg_feat)\n\n        return cls_score, bbox_pred, objectness\n\n    def forward(self, feats):\n        \"\"\"Forward features from the upstream network.\n\n        Args:\n            feats (tuple[Tensor]): Features from the upstream network, each is\n                a 4D-tensor.\n        Returns:\n            tuple[Tensor]: A tuple of multi-level predication map, each is a\n                4D-tensor of shape (batch_size, 5+num_classes, height, width).\n        \"\"\"\n\n        return multi_apply(self.forward_single, feats,\n                           self.multi_level_cls_convs,\n                           self.multi_level_reg_convs,\n                           self.multi_level_conv_cls,\n                           self.multi_level_conv_reg,\n                           self.multi_level_conv_obj)\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'objectnesses'))\n    def get_bboxes(self,\n                   cls_scores,\n                   bbox_preds,\n                   objectnesses,\n                   img_metas=None,\n                   cfg=None,\n                   rescale=False,\n                   with_nms=True):\n        \"\"\"Transform network outputs of a batch into bbox results.\n        Args:\n            cls_scores (list[Tensor]): Classification scores for all\n                scale levels, each is a 4D-tensor, has shape\n                (batch_size, num_priors * num_classes, H, W).\n            bbox_preds (list[Tensor]): Box energies / deltas for all\n                scale levels, each is a 4D-tensor, has shape\n                (batch_size, num_priors * 4, H, W).\n            objectnesses (list[Tensor], Optional): Score factor for\n                all scale level, each is a 4D-tensor, has shape\n                (batch_size, 1, H, W).\n            img_metas (list[dict], Optional): Image meta info. Default None.\n            cfg (mmcv.Config, Optional): Test / postprocessing configuration,\n                if None, test_cfg would be used.  Default None.\n            rescale (bool): If True, return boxes in original image space.\n                Default False.\n            with_nms (bool): If True, do nms before return boxes.\n                Default True.\n        Returns:\n            list[list[Tensor, Tensor]]: Each item in result_list is 2-tuple.\n                The first item is an (n, 5) tensor, where the first 4 columns\n                are bounding box positions (tl_x, tl_y, br_x, br_y) and the\n                5-th column is a score between 0 and 1. The second item is a\n                (n,) tensor where each item is the predicted class label of\n                the corresponding box.\n        \"\"\"\n        assert len(cls_scores) == len(bbox_preds) == len(objectnesses)\n        cfg = self.test_cfg if cfg is None else cfg\n        scale_factors = np.array(\n            [img_meta['scale_factor'] for img_meta in img_metas])\n\n        num_imgs = len(img_metas)\n        featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]\n        mlvl_priors = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=cls_scores[0].dtype,\n            device=cls_scores[0].device,\n            with_stride=True)\n\n        # flatten cls_scores, bbox_preds and objectness\n        flatten_cls_scores = [\n            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,\n                                                  self.cls_out_channels)\n            for cls_score in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_objectness = [\n            objectness.permute(0, 2, 3, 1).reshape(num_imgs, -1)\n            for objectness in objectnesses\n        ]\n\n        flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid()\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1)\n        flatten_objectness = torch.cat(flatten_objectness, dim=1).sigmoid()\n        flatten_priors = torch.cat(mlvl_priors)\n\n        flatten_bboxes = self._bbox_decode(flatten_priors, flatten_bbox_preds)\n\n        if rescale:\n            flatten_bboxes[..., :4] /= flatten_bboxes.new_tensor(\n                scale_factors).unsqueeze(1)\n\n        result_list = []\n        for img_id in range(len(img_metas)):\n            cls_scores = flatten_cls_scores[img_id]\n            score_factor = flatten_objectness[img_id]\n            bboxes = flatten_bboxes[img_id]\n\n            result_list.append(\n                self._bboxes_nms(cls_scores, bboxes, score_factor, cfg))\n\n        return result_list\n\n    def _bbox_decode(self, priors, bbox_preds):\n        xys = (bbox_preds[..., :2] * priors[:, 2:]) + priors[:, :2]\n        whs = bbox_preds[..., 2:].exp() * priors[:, 2:]\n\n        tl_x = (xys[..., 0] - whs[..., 0] / 2)\n        tl_y = (xys[..., 1] - whs[..., 1] / 2)\n        br_x = (xys[..., 0] + whs[..., 0] / 2)\n        br_y = (xys[..., 1] + whs[..., 1] / 2)\n\n        decoded_bboxes = torch.stack([tl_x, tl_y, br_x, br_y], -1)\n        return decoded_bboxes\n\n    def _bboxes_nms(self, cls_scores, bboxes, score_factor, cfg):\n        max_scores, labels = torch.max(cls_scores, 1)\n        valid_mask = score_factor * max_scores >= cfg.score_thr\n\n        bboxes = bboxes[valid_mask]\n        scores = max_scores[valid_mask] * score_factor[valid_mask]\n        labels = labels[valid_mask]\n\n        if labels.numel() == 0:\n            return bboxes, labels\n        else:\n            dets, keep = batched_nms(bboxes, scores, labels, cfg.nms)\n            return dets, labels[keep]\n\n    @force_fp32(apply_to=('cls_scores', 'bbox_preds', 'objectnesses'))\n    def loss(self,\n             cls_scores,\n             bbox_preds,\n             objectnesses,\n             gt_bboxes,\n             gt_labels,\n             img_metas,\n             gt_bboxes_ignore=None):\n        \"\"\"Compute loss of the head.\n        Args:\n            cls_scores (list[Tensor]): Box scores for each scale level,\n                each is a 4D-tensor, the channel number is\n                num_priors * num_classes.\n            bbox_preds (list[Tensor]): Box energies / deltas for each scale\n                level, each is a 4D-tensor, the channel number is\n                num_priors * 4.\n            objectnesses (list[Tensor], Optional): Score factor for\n                all scale level, each is a 4D-tensor, has shape\n                (batch_size, 1, H, W).\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            img_metas (list[dict]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n        \"\"\"\n        num_imgs = len(img_metas)\n        featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]\n        mlvl_priors = self.prior_generator.grid_priors(\n            featmap_sizes,\n            dtype=cls_scores[0].dtype,\n            device=cls_scores[0].device,\n            with_stride=True)\n\n        flatten_cls_preds = [\n            cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,\n                                                 self.cls_out_channels)\n            for cls_pred in cls_scores\n        ]\n        flatten_bbox_preds = [\n            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)\n            for bbox_pred in bbox_preds\n        ]\n        flatten_objectness = [\n            objectness.permute(0, 2, 3, 1).reshape(num_imgs, -1)\n            for objectness in objectnesses\n        ]\n\n        flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1)\n        flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1)\n        flatten_objectness = torch.cat(flatten_objectness, dim=1)\n        flatten_priors = torch.cat(mlvl_priors)\n        flatten_bboxes = self._bbox_decode(flatten_priors, flatten_bbox_preds)\n\n        (pos_masks, cls_targets, obj_targets, bbox_targets, l1_targets,\n         num_fg_imgs) = multi_apply(\n             self._get_target_single, flatten_cls_preds.detach(),\n             flatten_objectness.detach(),\n             flatten_priors.unsqueeze(0).repeat(num_imgs, 1, 1),\n             flatten_bboxes.detach(), gt_bboxes, gt_labels)\n\n        # The experimental results show that ‘reduce_mean’ can improve\n        # performance on the COCO dataset.\n        num_pos = torch.tensor(\n            sum(num_fg_imgs),\n            dtype=torch.float,\n            device=flatten_cls_preds.device)\n        num_total_samples = max(reduce_mean(num_pos), 1.0)\n\n        pos_masks = torch.cat(pos_masks, 0)\n        cls_targets = torch.cat(cls_targets, 0)\n        obj_targets = torch.cat(obj_targets, 0)\n        bbox_targets = torch.cat(bbox_targets, 0)\n        if self.use_l1:\n            l1_targets = torch.cat(l1_targets, 0)\n\n        loss_bbox = self.loss_bbox(\n            flatten_bboxes.view(-1, 4)[pos_masks],\n            bbox_targets) / num_total_samples\n        loss_obj = self.loss_obj(flatten_objectness.view(-1, 1),\n                                 obj_targets) / num_total_samples\n        loss_cls = self.loss_cls(\n            flatten_cls_preds.view(-1, self.num_classes)[pos_masks],\n            cls_targets) / num_total_samples\n\n        loss_dict = dict(\n            loss_cls=loss_cls, loss_bbox=loss_bbox, loss_obj=loss_obj)\n\n        if self.use_l1:\n            loss_l1 = self.loss_l1(\n                flatten_bbox_preds.view(-1, 4)[pos_masks],\n                l1_targets) / num_total_samples\n            loss_dict.update(loss_l1=loss_l1)\n\n        return loss_dict\n\n    @torch.no_grad()\n    def _get_target_single(self, cls_preds, objectness, priors, decoded_bboxes,\n                           gt_bboxes, gt_labels):\n        \"\"\"Compute classification, regression, and objectness targets for\n        priors in a single image.\n        Args:\n            cls_preds (Tensor): Classification predictions of one image,\n                a 2D-Tensor with shape [num_priors, num_classes]\n            objectness (Tensor): Objectness predictions of one image,\n                a 1D-Tensor with shape [num_priors]\n            priors (Tensor): All priors of one image, a 2D-Tensor with shape\n                [num_priors, 4] in [cx, xy, stride_w, stride_y] format.\n            decoded_bboxes (Tensor): Decoded bboxes predictions of one image,\n                a 2D-Tensor with shape [num_priors, 4] in [tl_x, tl_y,\n                br_x, br_y] format.\n            gt_bboxes (Tensor): Ground truth bboxes of one image, a 2D-Tensor\n                with shape [num_gts, 4] in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (Tensor): Ground truth labels of one image, a Tensor\n                with shape [num_gts].\n        \"\"\"\n\n        num_priors = priors.size(0)\n        num_gts = gt_labels.size(0)\n        gt_bboxes = gt_bboxes.to(decoded_bboxes.dtype)\n        # No target\n        if num_gts == 0:\n            cls_target = cls_preds.new_zeros((0, self.num_classes))\n            bbox_target = cls_preds.new_zeros((0, 4))\n            l1_target = cls_preds.new_zeros((0, 4))\n            obj_target = cls_preds.new_zeros((num_priors, 1))\n            foreground_mask = cls_preds.new_zeros(num_priors).bool()\n            return (foreground_mask, cls_target, obj_target, bbox_target,\n                    l1_target, 0)\n\n        # YOLOX uses center priors with 0.5 offset to assign targets,\n        # but use center priors without offset to regress bboxes.\n        offset_priors = torch.cat(\n            [priors[:, :2] + priors[:, 2:] * 0.5, priors[:, 2:]], dim=-1)\n\n        assign_result = self.assigner.assign(\n            cls_preds.sigmoid() * objectness.unsqueeze(1).sigmoid(),\n            offset_priors, decoded_bboxes, gt_bboxes, gt_labels)\n\n        sampling_result = self.sampler.sample(assign_result, priors, gt_bboxes)\n        pos_inds = sampling_result.pos_inds\n        num_pos_per_img = pos_inds.size(0)\n\n        pos_ious = assign_result.max_overlaps[pos_inds]\n        # IOU aware classification score\n        cls_target = F.one_hot(sampling_result.pos_gt_labels,\n                               self.num_classes) * pos_ious.unsqueeze(-1)\n        obj_target = torch.zeros_like(objectness).unsqueeze(-1)\n        obj_target[pos_inds] = 1\n        bbox_target = sampling_result.pos_gt_bboxes\n        l1_target = cls_preds.new_zeros((num_pos_per_img, 4))\n        if self.use_l1:\n            l1_target = self._get_l1_target(l1_target, bbox_target,\n                                            priors[pos_inds])\n        foreground_mask = torch.zeros_like(objectness).to(torch.bool)\n        foreground_mask[pos_inds] = 1\n        return (foreground_mask, cls_target, obj_target, bbox_target,\n                l1_target, num_pos_per_img)\n\n    def _get_l1_target(self, l1_target, gt_bboxes, priors, eps=1e-8):\n        \"\"\"Convert gt bboxes to center offset and log width height.\"\"\"\n        gt_cxcywh = bbox_xyxy_to_cxcywh(gt_bboxes)\n        l1_target[:, :2] = (gt_cxcywh[:, :2] - priors[:, :2]) / priors[:, 2:]\n        l1_target[:, 2:] = torch.log(gt_cxcywh[:, 2:] / priors[:, 2:] + eps)\n        return l1_target\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .atss import ATSS\nfrom .autoassign import AutoAssign\nfrom .base import BaseDetector\nfrom .cascade_rcnn import CascadeRCNN\nfrom .centernet import CenterNet\nfrom .cornernet import CornerNet\nfrom .ddod import DDOD\nfrom .deformable_detr import DeformableDETR\nfrom .detr import DETR\nfrom .fast_rcnn import FastRCNN\nfrom .faster_rcnn import FasterRCNN\nfrom .fcos import FCOS\nfrom .fovea import FOVEA\nfrom .fsaf import FSAF\nfrom .gfl import GFL\nfrom .grid_rcnn import GridRCNN\nfrom .htc import HybridTaskCascade\nfrom .kd_one_stage import KnowledgeDistillationSingleStageDetector\nfrom .lad import LAD\nfrom .mask2former import Mask2Former\nfrom .mask_rcnn import MaskRCNN\nfrom .mask_scoring_rcnn import MaskScoringRCNN\nfrom .maskformer import MaskFormer\nfrom .nasfcos import NASFCOS\nfrom .paa import PAA\nfrom .panoptic_fpn import PanopticFPN\nfrom .panoptic_two_stage_segmentor import TwoStagePanopticSegmentor\nfrom .point_rend import PointRend\nfrom .queryinst import QueryInst\nfrom .reppoints_detector import RepPointsDetector\nfrom .retinanet import RetinaNet\nfrom .rpn import RPN\nfrom .scnet import SCNet\nfrom .single_stage import SingleStageDetector\nfrom .solo import SOLO\nfrom .solov2 import SOLOv2\nfrom .sparse_rcnn import SparseRCNN\nfrom .tood import TOOD\nfrom .trident_faster_rcnn import TridentFasterRCNN\nfrom .two_stage import TwoStageDetector\nfrom .vfnet import VFNet\nfrom .yolact import YOLACT\nfrom .yolo import YOLOV3\nfrom .yolof import YOLOF\nfrom .yolox import YOLOX\n\n__all__ = [\n    'ATSS', 'BaseDetector', 'SingleStageDetector', 'TwoStageDetector', 'RPN',\n    'KnowledgeDistillationSingleStageDetector', 'FastRCNN', 'FasterRCNN',\n    'MaskRCNN', 'CascadeRCNN', 'HybridTaskCascade', 'RetinaNet', 'FCOS',\n    'GridRCNN', 'MaskScoringRCNN', 'RepPointsDetector', 'FOVEA', 'FSAF',\n    'NASFCOS', 'PointRend', 'GFL', 'CornerNet', 'PAA', 'YOLOV3', 'YOLACT',\n    'VFNet', 'DETR', 'TridentFasterRCNN', 'SparseRCNN', 'SCNet', 'SOLO',\n    'SOLOv2', 'DeformableDETR', 'AutoAssign', 'YOLOF', 'CenterNet', 'YOLOX',\n    'TwoStagePanopticSegmentor', 'PanopticFPN', 'QueryInst', 'LAD', 'TOOD',\n    'MaskFormer', 'DDOD', 'Mask2Former'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/atss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass ATSS(SingleStageDetector):\n    \"\"\"Implementation of `ATSS <https://arxiv.org/abs/1912.02424>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(ATSS, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                   test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/autoassign.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass AutoAssign(SingleStageDetector):\n    \"\"\"Implementation of `AutoAssign: Differentiable Label Assignment for Dense\n    Object Detection <https://arxiv.org/abs/2007.03496>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(AutoAssign, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                         test_cfg, pretrained)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/base.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\nfrom collections import OrderedDict\n\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.distributed as dist\nfrom mmcv.runner import BaseModule, auto_fp16\n\nfrom mmdet.core.visualization import imshow_det_bboxes\n\n\nclass BaseDetector(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for detectors.\"\"\"\n\n    def __init__(self, init_cfg=None):\n        super(BaseDetector, self).__init__(init_cfg)\n        self.fp16_enabled = False\n\n    @property\n    def with_neck(self):\n        \"\"\"bool: whether the detector has a neck\"\"\"\n        return hasattr(self, 'neck') and self.neck is not None\n\n    # TODO: these properties need to be carefully handled\n    # for both single stage & two stage detectors\n    @property\n    def with_shared_head(self):\n        \"\"\"bool: whether the detector has a shared head in the RoI Head\"\"\"\n        return hasattr(self, 'roi_head') and self.roi_head.with_shared_head\n\n    @property\n    def with_bbox(self):\n        \"\"\"bool: whether the detector has a bbox head\"\"\"\n        return ((hasattr(self, 'roi_head') and self.roi_head.with_bbox)\n                or (hasattr(self, 'bbox_head') and self.bbox_head is not None))\n\n    @property\n    def with_mask(self):\n        \"\"\"bool: whether the detector has a mask head\"\"\"\n        return ((hasattr(self, 'roi_head') and self.roi_head.with_mask)\n                or (hasattr(self, 'mask_head') and self.mask_head is not None))\n\n    @abstractmethod\n    def extract_feat(self, imgs):\n        \"\"\"Extract features from images.\"\"\"\n        pass\n\n    def extract_feats(self, imgs):\n        \"\"\"Extract features from multiple images.\n\n        Args:\n            imgs (list[torch.Tensor]): A list of images. The images are\n                augmented from the same image but in different ways.\n\n        Returns:\n            list[torch.Tensor]: Features of different images\n        \"\"\"\n        assert isinstance(imgs, list)\n        return [self.extract_feat(img) for img in imgs]\n\n    def forward_train(self, imgs, img_metas, **kwargs):\n        \"\"\"\n        Args:\n            img (Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys, see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            kwargs (keyword arguments): Specific to concrete implementation.\n        \"\"\"\n        # NOTE the batched image size information may be useful, e.g.\n        # in DETR, this is needed for the construction of masks, which is\n        # then used for the transformer_head.\n        batch_input_shape = tuple(imgs[0].size()[-2:])\n        for img_meta in img_metas:\n            img_meta['batch_input_shape'] = batch_input_shape\n\n    async def async_simple_test(self, img, img_metas, **kwargs):\n        raise NotImplementedError\n\n    @abstractmethod\n    def simple_test(self, img, img_metas, **kwargs):\n        pass\n\n    @abstractmethod\n    def aug_test(self, imgs, img_metas, **kwargs):\n        \"\"\"Test function with test time augmentation.\"\"\"\n        pass\n\n    async def aforward_test(self, *, img, img_metas, **kwargs):\n        for var, name in [(img, 'img'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError(f'{name} must be a list, but got {type(var)}')\n\n        num_augs = len(img)\n        if num_augs != len(img_metas):\n            raise ValueError(f'num of augmentations ({len(img)}) '\n                             f'!= num of image metas ({len(img_metas)})')\n        # TODO: remove the restriction of samples_per_gpu == 1 when prepared\n        samples_per_gpu = img[0].size(0)\n        assert samples_per_gpu == 1\n\n        if num_augs == 1:\n            return await self.async_simple_test(img[0], img_metas[0], **kwargs)\n        else:\n            raise NotImplementedError\n\n    def forward_test(self, imgs, img_metas, **kwargs):\n        \"\"\"\n        Args:\n            imgs (List[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains all images in the batch.\n            img_metas (List[List[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch.\n        \"\"\"\n        for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError(f'{name} must be a list, but got {type(var)}')\n\n        num_augs = len(imgs)\n        if num_augs != len(img_metas):\n            raise ValueError(f'num of augmentations ({len(imgs)}) '\n                             f'!= num of image meta ({len(img_metas)})')\n\n        # NOTE the batched image size information may be useful, e.g.\n        # in DETR, this is needed for the construction of masks, which is\n        # then used for the transformer_head.\n        for img, img_meta in zip(imgs, img_metas):\n            batch_size = len(img_meta)\n            for img_id in range(batch_size):\n                img_meta[img_id]['batch_input_shape'] = tuple(img.size()[-2:])\n\n        if num_augs == 1:\n            # proposals (List[List[Tensor]]): the outer list indicates\n            # test-time augs (multiscale, flip, etc.) and the inner list\n            # indicates images in a batch.\n            # The Tensor should have a shape Px4, where P is the number of\n            # proposals.\n            if 'proposals' in kwargs:\n                kwargs['proposals'] = kwargs['proposals'][0]\n            return self.simple_test(imgs[0], img_metas[0], **kwargs)\n        else:\n            assert imgs[0].size(0) == 1, 'aug test does not support ' \\\n                                         'inference with batch size ' \\\n                                         f'{imgs[0].size(0)}'\n            # TODO: support test augmentation for predefined proposals\n            assert 'proposals' not in kwargs\n            return self.aug_test(imgs, img_metas, **kwargs)\n\n    @auto_fp16(apply_to=('img', ))\n    def forward(self, img, img_metas, return_loss=True, **kwargs):\n        \"\"\"Calls either :func:`forward_train` or :func:`forward_test` depending\n        on whether ``return_loss`` is ``True``.\n\n        Note this setting will change the expected inputs. When\n        ``return_loss=True``, img and img_meta are single-nested (i.e. Tensor\n        and List[dict]), and when ``resturn_loss=False``, img and img_meta\n        should be double nested (i.e.  List[Tensor], List[List[dict]]), with\n        the outer list indicating test time augmentations.\n        \"\"\"\n        if torch.onnx.is_in_onnx_export():\n            assert len(img_metas) == 1\n            return self.onnx_export(img[0], img_metas[0])\n\n        if return_loss:\n            return self.forward_train(img, img_metas, **kwargs)\n        else:\n            return self.forward_test(img, img_metas, **kwargs)\n\n    def _parse_losses(self, losses):\n        \"\"\"Parse the raw outputs (losses) of the network.\n\n        Args:\n            losses (dict): Raw output of the network, which usually contain\n                losses and other necessary information.\n\n        Returns:\n            tuple[Tensor, dict]: (loss, log_vars), loss is the loss tensor \\\n                which may be a weighted sum of all losses, log_vars contains \\\n                all the variables to be sent to the logger.\n        \"\"\"\n        log_vars = OrderedDict()\n        for loss_name, loss_value in losses.items():\n            if isinstance(loss_value, torch.Tensor):\n                log_vars[loss_name] = loss_value.mean()\n            elif isinstance(loss_value, list):\n                log_vars[loss_name] = sum(_loss.mean() for _loss in loss_value)\n            else:\n                raise TypeError(\n                    f'{loss_name} is not a tensor or list of tensors')\n\n        loss = sum(_value for _key, _value in log_vars.items()\n                   if 'loss' in _key)\n\n        # If the loss_vars has different length, GPUs will wait infinitely\n        if dist.is_available() and dist.is_initialized():\n            log_var_length = torch.tensor(len(log_vars), device=loss.device)\n            dist.all_reduce(log_var_length)\n            message = (f'rank {dist.get_rank()}' +\n                       f' len(log_vars): {len(log_vars)}' + ' keys: ' +\n                       ','.join(log_vars.keys()))\n            assert log_var_length == len(log_vars) * dist.get_world_size(), \\\n                'loss log variables are different across GPUs!\\n' + message\n\n        log_vars['loss'] = loss\n        for loss_name, loss_value in log_vars.items():\n            # reduce loss when distributed training\n            if dist.is_available() and dist.is_initialized():\n                loss_value = loss_value.data.clone()\n                dist.all_reduce(loss_value.div_(dist.get_world_size()))\n            log_vars[loss_name] = loss_value.item()\n\n        return loss, log_vars\n\n    def train_step(self, data, optimizer):\n        \"\"\"The iteration step during training.\n\n        This method defines an iteration step during training, except for the\n        back propagation and optimizer updating, which are done in an optimizer\n        hook. Note that in some complicated cases or models, the whole process\n        including back propagation and optimizer updating is also defined in\n        this method, such as GAN.\n\n        Args:\n            data (dict): The output of dataloader.\n            optimizer (:obj:`torch.optim.Optimizer` | dict): The optimizer of\n                runner is passed to ``train_step()``. This argument is unused\n                and reserved.\n\n        Returns:\n            dict: It should contain at least 3 keys: ``loss``, ``log_vars``, \\\n                ``num_samples``.\n\n                - ``loss`` is a tensor for back propagation, which can be a\n                  weighted sum of multiple losses.\n                - ``log_vars`` contains all the variables to be sent to the\n                  logger.\n                - ``num_samples`` indicates the batch size (when the model is\n                  DDP, it means the batch size on each GPU), which is used for\n                  averaging the logs.\n        \"\"\"\n        losses = self(**data)\n        loss, log_vars = self._parse_losses(losses)\n\n        outputs = dict(\n            loss=loss, log_vars=log_vars, num_samples=len(data['img_metas']))\n\n        return outputs\n\n    def val_step(self, data, optimizer=None):\n        \"\"\"The iteration step during validation.\n\n        This method shares the same signature as :func:`train_step`, but used\n        during val epochs. Note that the evaluation after training epochs is\n        not implemented with this method, but an evaluation hook.\n        \"\"\"\n        losses = self(**data)\n        loss, log_vars = self._parse_losses(losses)\n\n        outputs = dict(\n            loss=loss, log_vars=log_vars, num_samples=len(data['img_metas']))\n\n        return outputs\n\n    def show_result(self,\n                    img,\n                    result,\n                    score_thr=0.3,\n                    bbox_color=(72, 101, 241),\n                    text_color=(72, 101, 241),\n                    mask_color=None,\n                    thickness=2,\n                    font_size=13,\n                    win_name='',\n                    show=False,\n                    wait_time=0,\n                    out_file=None):\n        \"\"\"Draw `result` over `img`.\n\n        Args:\n            img (str or Tensor): The image to be displayed.\n            result (Tensor or tuple): The results to draw over `img`\n                bbox_result or (bbox_result, segm_result).\n            score_thr (float, optional): Minimum score of bboxes to be shown.\n                Default: 0.3.\n            bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.\n               The tuple of color should be in BGR order. Default: 'green'\n            text_color (str or tuple(int) or :obj:`Color`):Color of texts.\n               The tuple of color should be in BGR order. Default: 'green'\n            mask_color (None or str or tuple(int) or :obj:`Color`):\n               Color of masks. The tuple of color should be in BGR order.\n               Default: None\n            thickness (int): Thickness of lines. Default: 2\n            font_size (int): Font size of texts. Default: 13\n            win_name (str): The window name. Default: ''\n            wait_time (float): Value of waitKey param.\n                Default: 0.\n            show (bool): Whether to show the image.\n                Default: False.\n            out_file (str or None): The filename to write the image.\n                Default: None.\n\n        Returns:\n            img (Tensor): Only if not `show` or `out_file`\n        \"\"\"\n        img = mmcv.imread(img)\n        img = img.copy()\n        if isinstance(result, tuple):\n            bbox_result, segm_result = result\n            if isinstance(segm_result, tuple):\n                segm_result = segm_result[0]  # ms rcnn\n        else:\n            bbox_result, segm_result = result, None\n        bboxes = np.vstack(bbox_result)\n        labels = [\n            np.full(bbox.shape[0], i, dtype=np.int32)\n            for i, bbox in enumerate(bbox_result)\n        ]\n        labels = np.concatenate(labels)\n        # draw segmentation masks\n        segms = None\n        if segm_result is not None and len(labels) > 0:  # non empty\n            segms = mmcv.concat_list(segm_result)\n            if isinstance(segms[0], torch.Tensor):\n                segms = torch.stack(segms, dim=0).detach().cpu().numpy()\n            else:\n                segms = np.stack(segms, axis=0)\n        # if out_file specified, do not show image in window\n        if out_file is not None:\n            show = False\n        # draw bounding boxes\n        img = imshow_det_bboxes(\n            img,\n            bboxes,\n            labels,\n            segms,\n            class_names=self.CLASSES,\n            score_thr=score_thr,\n            bbox_color=bbox_color,\n            text_color=text_color,\n            mask_color=mask_color,\n            thickness=thickness,\n            font_size=font_size,\n            win_name=win_name,\n            show=show,\n            wait_time=wait_time,\n            out_file=out_file)\n\n        if not (show or out_file):\n            return img\n\n    def onnx_export(self, img, img_metas):\n        raise NotImplementedError(f'{self.__class__.__name__} does '\n                                  f'not support ONNX EXPORT')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/cascade_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass CascadeRCNN(TwoStageDetector):\n    r\"\"\"Implementation of `Cascade R-CNN: Delving into High Quality Object\n    Detection <https://arxiv.org/abs/1906.09756>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(CascadeRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n\n    def show_result(self, data, result, **kwargs):\n        \"\"\"Show prediction results of the detector.\n\n        Args:\n            data (str or np.ndarray): Image filename or loaded image.\n            result (Tensor or tuple): The results to draw over `img`\n                bbox_result or (bbox_result, segm_result).\n\n        Returns:\n            np.ndarray: The image with bboxes drawn on it.\n        \"\"\"\n        if self.with_mask:\n            ms_bbox_result, ms_segm_result = result\n            if isinstance(ms_bbox_result, dict):\n                result = (ms_bbox_result['ensemble'],\n                          ms_segm_result['ensemble'])\n        else:\n            if isinstance(result, dict):\n                result = result['ensemble']\n        return super(CascadeRCNN, self).show_result(data, result, **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/centernet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import bbox2result\nfrom mmdet.models.builder import DETECTORS\nfrom ...core.utils import flip_tensor\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass CenterNet(SingleStageDetector):\n    \"\"\"Implementation of CenterNet(Objects as Points)\n\n    <https://arxiv.org/abs/1904.07850>.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(CenterNet, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                        test_cfg, pretrained, init_cfg)\n\n    def merge_aug_results(self, aug_results, with_nms):\n        \"\"\"Merge augmented detection bboxes and score.\n\n        Args:\n            aug_results (list[list[Tensor]]): Det_bboxes and det_labels of each\n                image.\n            with_nms (bool): If True, do nms before return boxes.\n\n        Returns:\n            tuple: (out_bboxes, out_labels)\n        \"\"\"\n        recovered_bboxes, aug_labels = [], []\n        for single_result in aug_results:\n            recovered_bboxes.append(single_result[0][0])\n            aug_labels.append(single_result[0][1])\n\n        bboxes = torch.cat(recovered_bboxes, dim=0).contiguous()\n        labels = torch.cat(aug_labels).contiguous()\n        if with_nms:\n            out_bboxes, out_labels = self.bbox_head._bboxes_nms(\n                bboxes, labels, self.bbox_head.test_cfg)\n        else:\n            out_bboxes, out_labels = bboxes, labels\n\n        return out_bboxes, out_labels\n\n    def aug_test(self, imgs, img_metas, rescale=True):\n        \"\"\"Augment testing of CenterNet. Aug test must have flipped image pair,\n        and unlike CornerNet, it will perform an averaging operation on the\n        feature map instead of detecting bbox.\n\n        Args:\n            imgs (list[Tensor]): Augmented images.\n            img_metas (list[list[dict]]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: True.\n\n        Note:\n            ``imgs`` must including flipped image pairs.\n\n        Returns:\n            list[list[np.ndarray]]: BBox results of each image and classes.\n                The outer list corresponds to each image. The inner list\n                corresponds to each class.\n        \"\"\"\n        img_inds = list(range(len(imgs)))\n        assert img_metas[0][0]['flip'] + img_metas[1][0]['flip'], (\n            'aug test must have flipped image pair')\n        aug_results = []\n        for ind, flip_ind in zip(img_inds[0::2], img_inds[1::2]):\n            flip_direction = img_metas[flip_ind][0]['flip_direction']\n            img_pair = torch.cat([imgs[ind], imgs[flip_ind]])\n            x = self.extract_feat(img_pair)\n            center_heatmap_preds, wh_preds, offset_preds = self.bbox_head(x)\n            assert len(center_heatmap_preds) == len(wh_preds) == len(\n                offset_preds) == 1\n\n            # Feature map averaging\n            center_heatmap_preds[0] = (\n                center_heatmap_preds[0][0:1] +\n                flip_tensor(center_heatmap_preds[0][1:2], flip_direction)) / 2\n            wh_preds[0] = (wh_preds[0][0:1] +\n                           flip_tensor(wh_preds[0][1:2], flip_direction)) / 2\n\n            bbox_list = self.bbox_head.get_bboxes(\n                center_heatmap_preds,\n                wh_preds, [offset_preds[0][0:1]],\n                img_metas[ind],\n                rescale=rescale,\n                with_nms=False)\n            aug_results.append(bbox_list)\n\n        nms_cfg = self.bbox_head.test_cfg.get('nms_cfg', None)\n        if nms_cfg is None:\n            with_nms = False\n        else:\n            with_nms = True\n        bbox_list = [self.merge_aug_results(aug_results, with_nms)]\n        bbox_results = [\n            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)\n            for det_bboxes, det_labels in bbox_list\n        ]\n        return bbox_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/cornernet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import bbox2result, bbox_mapping_back\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass CornerNet(SingleStageDetector):\n    \"\"\"CornerNet.\n\n    This detector is the implementation of the paper `CornerNet: Detecting\n    Objects as Paired Keypoints <https://arxiv.org/abs/1808.01244>`_ .\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(CornerNet, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                        test_cfg, pretrained, init_cfg)\n\n    def merge_aug_results(self, aug_results, img_metas):\n        \"\"\"Merge augmented detection bboxes and score.\n\n        Args:\n            aug_results (list[list[Tensor]]): Det_bboxes and det_labels of each\n                image.\n            img_metas (list[list[dict]]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n\n        Returns:\n            tuple: (bboxes, labels)\n        \"\"\"\n        recovered_bboxes, aug_labels = [], []\n        for bboxes_labels, img_info in zip(aug_results, img_metas):\n            img_shape = img_info[0]['img_shape']  # using shape before padding\n            scale_factor = img_info[0]['scale_factor']\n            flip = img_info[0]['flip']\n            bboxes, labels = bboxes_labels\n            bboxes, scores = bboxes[:, :4], bboxes[:, -1:]\n            bboxes = bbox_mapping_back(bboxes, img_shape, scale_factor, flip)\n            recovered_bboxes.append(torch.cat([bboxes, scores], dim=-1))\n            aug_labels.append(labels)\n\n        bboxes = torch.cat(recovered_bboxes, dim=0)\n        labels = torch.cat(aug_labels)\n\n        if bboxes.shape[0] > 0:\n            out_bboxes, out_labels = self.bbox_head._bboxes_nms(\n                bboxes, labels, self.bbox_head.test_cfg)\n        else:\n            out_bboxes, out_labels = bboxes, labels\n\n        return out_bboxes, out_labels\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Augment testing of CornerNet.\n\n        Args:\n            imgs (list[Tensor]): Augmented images.\n            img_metas (list[list[dict]]): Meta information of each image, e.g.,\n                image size, scaling factor, etc.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n\n        Note:\n            ``imgs`` must including flipped image pairs.\n\n        Returns:\n            list[list[np.ndarray]]: BBox results of each image and classes.\n                The outer list corresponds to each image. The inner list\n                corresponds to each class.\n        \"\"\"\n        img_inds = list(range(len(imgs)))\n\n        assert img_metas[0][0]['flip'] + img_metas[1][0]['flip'], (\n            'aug test must have flipped image pair')\n        aug_results = []\n        for ind, flip_ind in zip(img_inds[0::2], img_inds[1::2]):\n            img_pair = torch.cat([imgs[ind], imgs[flip_ind]])\n            x = self.extract_feat(img_pair)\n            outs = self.bbox_head(x)\n            bbox_list = self.bbox_head.get_bboxes(\n                *outs, [img_metas[ind], img_metas[flip_ind]], False, False)\n            aug_results.append(bbox_list[0])\n            aug_results.append(bbox_list[1])\n\n        bboxes, labels = self.merge_aug_results(aug_results, img_metas)\n        bbox_results = bbox2result(bboxes, labels, self.bbox_head.num_classes)\n\n        return [bbox_results]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/ddod.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass DDOD(SingleStageDetector):\n    \"\"\"Implementation of `DDOD <https://arxiv.org/pdf/2107.02963.pdf>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(DDOD, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                   test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/deformable_detr.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .detr import DETR\n\n\n@DETECTORS.register_module()\nclass DeformableDETR(DETR):\n\n    def __init__(self, *args, **kwargs):\n        super(DETR, self).__init__(*args, **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/detr.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\n\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass DETR(SingleStageDetector):\n    r\"\"\"Implementation of `DETR: End-to-End Object Detection with\n    Transformers <https://arxiv.org/pdf/2005.12872>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(DETR, self).__init__(backbone, None, bbox_head, train_cfg,\n                                   test_cfg, pretrained, init_cfg)\n\n    # over-write `forward_dummy` because:\n    # the forward of bbox_head requires img_metas\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        warnings.warn('Warning! MultiheadAttention in DETR does not '\n                      'support flops computation! Do not use the '\n                      'results in your papers!')\n\n        batch_size, _, height, width = img.shape\n        dummy_img_metas = [\n            dict(\n                batch_input_shape=(height, width),\n                img_shape=(height, width, 3)) for _ in range(batch_size)\n        ]\n        x = self.extract_feat(img)\n        outs = self.bbox_head(x, dummy_img_metas)\n        return outs\n\n    # over-write `onnx_export` because:\n    # (1) the forward of bbox_head requires img_metas\n    # (2) the different behavior (e.g. construction of `masks`) between\n    # torch and ONNX model, during the forward of bbox_head\n    def onnx_export(self, img, img_metas):\n        \"\"\"Test function for exporting to ONNX, without test time augmentation.\n\n        Args:\n            img (torch.Tensor): input images.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]\n                and class labels of shape [N, num_det].\n        \"\"\"\n        x = self.extract_feat(img)\n        # forward of this head requires img_metas\n        outs = self.bbox_head.forward_onnx(x, img_metas)\n        # get shape as tensor\n        img_shape = torch._shape_as_tensor(img)[2:]\n        img_metas[0]['img_shape_for_onnx'] = img_shape\n\n        det_bboxes, det_labels = self.bbox_head.onnx_export(*outs, img_metas)\n\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/fast_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass FastRCNN(TwoStageDetector):\n    \"\"\"Implementation of `Fast R-CNN <https://arxiv.org/abs/1504.08083>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(FastRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n\n    def forward_test(self, imgs, img_metas, proposals, **kwargs):\n        \"\"\"\n        Args:\n            imgs (List[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains all images in the batch.\n            img_metas (List[List[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch.\n            proposals (List[List[Tensor]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch. The Tensor should have a shape Px4, where\n                P is the number of proposals.\n        \"\"\"\n        for var, name in [(imgs, 'imgs'), (img_metas, 'img_metas')]:\n            if not isinstance(var, list):\n                raise TypeError(f'{name} must be a list, but got {type(var)}')\n\n        num_augs = len(imgs)\n        if num_augs != len(img_metas):\n            raise ValueError(f'num of augmentations ({len(imgs)}) '\n                             f'!= num of image meta ({len(img_metas)})')\n\n        if num_augs == 1:\n            return self.simple_test(imgs[0], img_metas[0], proposals[0],\n                                    **kwargs)\n        else:\n            # TODO: support test-time augmentation\n            assert NotImplementedError\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/faster_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass FasterRCNN(TwoStageDetector):\n    \"\"\"Implementation of `Faster R-CNN <https://arxiv.org/abs/1506.01497>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(FasterRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/fcos.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass FCOS(SingleStageDetector):\n    \"\"\"Implementation of `FCOS <https://arxiv.org/abs/1904.01355>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(FCOS, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                   test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/fovea.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass FOVEA(SingleStageDetector):\n    \"\"\"Implementation of `FoveaBox <https://arxiv.org/abs/1904.03797>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(FOVEA, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                    test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/fsaf.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass FSAF(SingleStageDetector):\n    \"\"\"Implementation of `FSAF <https://arxiv.org/abs/1903.00621>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(FSAF, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                   test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/gfl.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass GFL(SingleStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(GFL, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                  test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/grid_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass GridRCNN(TwoStageDetector):\n    \"\"\"Grid R-CNN.\n\n    This detector is the implementation of:\n    - Grid R-CNN (https://arxiv.org/abs/1811.12030)\n    - Grid R-CNN Plus: Faster and Better (https://arxiv.org/abs/1906.05688)\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(GridRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/htc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .cascade_rcnn import CascadeRCNN\n\n\n@DETECTORS.register_module()\nclass HybridTaskCascade(CascadeRCNN):\n    \"\"\"Implementation of `HTC <https://arxiv.org/abs/1901.07518>`_\"\"\"\n\n    def __init__(self, **kwargs):\n        super(HybridTaskCascade, self).__init__(**kwargs)\n\n    @property\n    def with_semantic(self):\n        \"\"\"bool: whether the detector has a semantic head\"\"\"\n        return self.roi_head.with_semantic\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/kd_one_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom pathlib import Path\n\nimport mmcv\nimport torch\nfrom mmcv.runner import load_checkpoint\n\nfrom .. import build_detector\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass KnowledgeDistillationSingleStageDetector(SingleStageDetector):\n    r\"\"\"Implementation of `Distilling the Knowledge in a Neural Network.\n    <https://arxiv.org/abs/1503.02531>`_.\n\n    Args:\n        teacher_config (str | dict): Config file path\n            or the config object of teacher model.\n        teacher_ckpt (str, optional): Checkpoint path of teacher model.\n            If left as None, the model will not load any weights.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 teacher_config,\n                 teacher_ckpt=None,\n                 eval_teacher=True,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super().__init__(backbone, neck, bbox_head, train_cfg, test_cfg,\n                         pretrained)\n        self.eval_teacher = eval_teacher\n        # Build teacher model\n        if isinstance(teacher_config, (str, Path)):\n            teacher_config = mmcv.Config.fromfile(teacher_config)\n        self.teacher_model = build_detector(teacher_config['model'])\n        if teacher_ckpt is not None:\n            load_checkpoint(\n                self.teacher_model, teacher_ckpt, map_location='cpu')\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (N, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        x = self.extract_feat(img)\n        with torch.no_grad():\n            teacher_x = self.teacher_model.extract_feat(img)\n            out_teacher = self.teacher_model.bbox_head(teacher_x)\n        losses = self.bbox_head.forward_train(x, out_teacher, img_metas,\n                                              gt_bboxes, gt_labels,\n                                              gt_bboxes_ignore)\n        return losses\n\n    def cuda(self, device=None):\n        \"\"\"Since teacher_model is registered as a plain object, it is necessary\n        to put the teacher model to cuda when calling cuda function.\"\"\"\n        self.teacher_model.cuda(device=device)\n        return super().cuda(device=device)\n\n    def train(self, mode=True):\n        \"\"\"Set the same train mode for teacher and student model.\"\"\"\n        if self.eval_teacher:\n            self.teacher_model.train(False)\n        else:\n            self.teacher_model.train(mode)\n        super().train(mode)\n\n    def __setattr__(self, name, value):\n        \"\"\"Set attribute, i.e. self.name = value\n\n        This reloading prevent the teacher model from being registered as a\n        nn.Module. The teacher module is registered as a plain object, so that\n        the teacher parameters will not show up when calling\n        ``self.parameters``, ``self.modules``, ``self.children`` methods.\n        \"\"\"\n        if name == 'teacher_model':\n            object.__setattr__(self, name, value)\n        else:\n            super().__setattr__(name, value)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/lad.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import load_checkpoint\n\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .kd_one_stage import KnowledgeDistillationSingleStageDetector\n\n\n@DETECTORS.register_module()\nclass LAD(KnowledgeDistillationSingleStageDetector):\n    \"\"\"Implementation of `LAD <https://arxiv.org/pdf/2108.10520.pdf>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 teacher_backbone,\n                 teacher_neck,\n                 teacher_bbox_head,\n                 teacher_ckpt,\n                 eval_teacher=True,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(KnowledgeDistillationSingleStageDetector,\n              self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg,\n                             pretrained)\n        self.eval_teacher = eval_teacher\n        self.teacher_model = nn.Module()\n        self.teacher_model.backbone = build_backbone(teacher_backbone)\n        if teacher_neck is not None:\n            self.teacher_model.neck = build_neck(teacher_neck)\n        teacher_bbox_head.update(train_cfg=train_cfg)\n        teacher_bbox_head.update(test_cfg=test_cfg)\n        self.teacher_model.bbox_head = build_head(teacher_bbox_head)\n        if teacher_ckpt is not None:\n            load_checkpoint(\n                self.teacher_model, teacher_ckpt, map_location='cpu')\n\n    @property\n    def with_teacher_neck(self):\n        \"\"\"bool: whether the detector has a teacher_neck\"\"\"\n        return hasattr(self.teacher_model, 'neck') and \\\n            self.teacher_model.neck is not None\n\n    def extract_teacher_feat(self, img):\n        \"\"\"Directly extract teacher features from the backbone+neck.\"\"\"\n        x = self.teacher_model.backbone(img)\n        if self.with_teacher_neck:\n            x = self.teacher_model.neck(x)\n        return x\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (N, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        # get label assignment from the teacher\n        with torch.no_grad():\n            x_teacher = self.extract_teacher_feat(img)\n            outs_teacher = self.teacher_model.bbox_head(x_teacher)\n            label_assignment_results = \\\n                self.teacher_model.bbox_head.get_label_assignment(\n                    *outs_teacher, gt_bboxes, gt_labels, img_metas,\n                    gt_bboxes_ignore)\n\n        # the student use the label assignment from the teacher to learn\n        x = self.extract_feat(img)\n        losses = self.bbox_head.forward_train(x, label_assignment_results,\n                                              img_metas, gt_bboxes, gt_labels,\n                                              gt_bboxes_ignore)\n        return losses\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/mask2former.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .maskformer import MaskFormer\n\n\n@DETECTORS.register_module()\nclass Mask2Former(MaskFormer):\n    r\"\"\"Implementation of `Masked-attention Mask\n    Transformer for Universal Image Segmentation\n    <https://arxiv.org/pdf/2112.01527>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 panoptic_head=None,\n                 panoptic_fusion_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None):\n        super().__init__(\n            backbone,\n            neck=neck,\n            panoptic_head=panoptic_head,\n            panoptic_fusion_head=panoptic_fusion_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/mask_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass MaskRCNN(TwoStageDetector):\n    \"\"\"Implementation of `Mask R-CNN <https://arxiv.org/abs/1703.06870>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(MaskRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/mask_scoring_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass MaskScoringRCNN(TwoStageDetector):\n    \"\"\"Mask Scoring RCNN.\n\n    https://arxiv.org/abs/1903.00241\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(MaskScoringRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/maskformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet.core import INSTANCE_OFFSET, bbox2result\nfrom mmdet.core.visualization import imshow_det_bboxes\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass MaskFormer(SingleStageDetector):\n    r\"\"\"Implementation of `Per-Pixel Classification is\n    NOT All You Need for Semantic Segmentation\n    <https://arxiv.org/pdf/2107.06278>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 panoptic_head=None,\n                 panoptic_fusion_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None):\n        super(SingleStageDetector, self).__init__(init_cfg=init_cfg)\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n\n        panoptic_head_ = copy.deepcopy(panoptic_head)\n        panoptic_head_.update(train_cfg=train_cfg)\n        panoptic_head_.update(test_cfg=test_cfg)\n        self.panoptic_head = build_head(panoptic_head_)\n\n        panoptic_fusion_head_ = copy.deepcopy(panoptic_fusion_head)\n        panoptic_fusion_head_.update(test_cfg=test_cfg)\n        self.panoptic_fusion_head = build_head(panoptic_fusion_head_)\n\n        self.num_things_classes = self.panoptic_head.num_things_classes\n        self.num_stuff_classes = self.panoptic_head.num_stuff_classes\n        self.num_classes = self.panoptic_head.num_classes\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n        # BaseDetector.show_result default for instance segmentation\n        if self.num_stuff_classes > 0:\n            self.show_result = self._show_pan_result\n\n    def forward_dummy(self, img, img_metas):\n        \"\"\"Used for computing network flops. See\n        `mmdetection/tools/analysis_tools/get_flops.py`\n\n        Args:\n            img (Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[Dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n        \"\"\"\n        super(SingleStageDetector, self).forward_train(img, img_metas)\n        x = self.extract_feat(img)\n        outs = self.panoptic_head(x, img_metas)\n        return outs\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_masks,\n                      gt_semantic_seg=None,\n                      gt_bboxes_ignore=None,\n                      **kargs):\n        \"\"\"\n        Args:\n            img (Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[Dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box.\n            gt_masks (list[BitmapMasks]): true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n            gt_semantic_seg (list[tensor]): semantic segmentation mask for\n                images for panoptic segmentation.\n                Defaults to None for instance segmentation.\n            gt_bboxes_ignore (list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n                Defaults to None.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # add batch_input_shape in img_metas\n        super(SingleStageDetector, self).forward_train(img, img_metas)\n        x = self.extract_feat(img)\n        losses = self.panoptic_head.forward_train(x, img_metas, gt_bboxes,\n                                                  gt_labels, gt_masks,\n                                                  gt_semantic_seg,\n                                                  gt_bboxes_ignore)\n\n        return losses\n\n    def simple_test(self, imgs, img_metas, **kwargs):\n        \"\"\"Test without augmentation.\n\n        Args:\n            imgs (Tensor): A batch of images.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            list[dict[str, np.array | tuple[list]] | tuple[list]]:\n                Semantic segmentation results and panoptic segmentation \\\n                results of each image for panoptic segmentation, or formatted \\\n                bbox and mask results of each image for instance segmentation.\n\n            .. code-block:: none\n\n                [\n                    # panoptic segmentation\n                    {\n                        'pan_results': np.array, # shape = [h, w]\n                        'ins_results': tuple[list],\n                        # semantic segmentation results are not supported yet\n                        'sem_results': np.array\n                    },\n                    ...\n                ]\n\n            or\n\n            .. code-block:: none\n\n                [\n                    # instance segmentation\n                    (\n                        bboxes, # list[np.array]\n                        masks # list[list[np.array]]\n                    ),\n                    ...\n                ]\n        \"\"\"\n        feats = self.extract_feat(imgs)\n        mask_cls_results, mask_pred_results = self.panoptic_head.simple_test(\n            feats, img_metas, **kwargs)\n        results = self.panoptic_fusion_head.simple_test(\n            mask_cls_results, mask_pred_results, img_metas, **kwargs)\n        for i in range(len(results)):\n            if 'pan_results' in results[i]:\n                results[i]['pan_results'] = results[i]['pan_results'].detach(\n                ).cpu().numpy()\n\n            if 'ins_results' in results[i]:\n                labels_per_image, bboxes, mask_pred_binary = results[i][\n                    'ins_results']\n                bbox_results = bbox2result(bboxes, labels_per_image,\n                                           self.num_things_classes)\n                mask_results = [[] for _ in range(self.num_things_classes)]\n                for j, label in enumerate(labels_per_image):\n                    mask = mask_pred_binary[j].detach().cpu().numpy()\n                    mask_results[label].append(mask)\n                results[i]['ins_results'] = bbox_results, mask_results\n\n            assert 'sem_results' not in results[i], 'segmantic segmentation '\\\n                'results are not supported yet.'\n\n        if self.num_stuff_classes == 0:\n            results = [res['ins_results'] for res in results]\n\n        return results\n\n    def aug_test(self, imgs, img_metas, **kwargs):\n        raise NotImplementedError\n\n    def onnx_export(self, img, img_metas):\n        raise NotImplementedError\n\n    def _show_pan_result(self,\n                         img,\n                         result,\n                         score_thr=0.3,\n                         bbox_color=(72, 101, 241),\n                         text_color=(72, 101, 241),\n                         mask_color=None,\n                         thickness=2,\n                         font_size=13,\n                         win_name='',\n                         show=False,\n                         wait_time=0,\n                         out_file=None):\n        \"\"\"Draw `panoptic result` over `img`.\n\n        Args:\n            img (str or Tensor): The image to be displayed.\n            result (dict): The results.\n\n            score_thr (float, optional): Minimum score of bboxes to be shown.\n                Default: 0.3.\n            bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.\n               The tuple of color should be in BGR order. Default: 'green'.\n            text_color (str or tuple(int) or :obj:`Color`):Color of texts.\n               The tuple of color should be in BGR order. Default: 'green'.\n            mask_color (None or str or tuple(int) or :obj:`Color`):\n               Color of masks. The tuple of color should be in BGR order.\n               Default: None.\n            thickness (int): Thickness of lines. Default: 2.\n            font_size (int): Font size of texts. Default: 13.\n            win_name (str): The window name. Default: ''.\n            wait_time (float): Value of waitKey param.\n                Default: 0.\n            show (bool): Whether to show the image.\n                Default: False.\n            out_file (str or None): The filename to write the image.\n                Default: None.\n\n        Returns:\n            img (Tensor): Only if not `show` or `out_file`.\n        \"\"\"\n        img = mmcv.imread(img)\n        img = img.copy()\n        pan_results = result['pan_results']\n        # keep objects ahead\n        ids = np.unique(pan_results)[::-1]\n        legal_indices = ids != self.num_classes  # for VOID label\n        ids = ids[legal_indices]\n        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)\n        segms = (pan_results[None] == ids[:, None, None])\n\n        # if out_file specified, do not show image in window\n        if out_file is not None:\n            show = False\n        # draw bounding boxes\n        img = imshow_det_bboxes(\n            img,\n            segms=segms,\n            labels=labels,\n            class_names=self.CLASSES,\n            bbox_color=bbox_color,\n            text_color=text_color,\n            mask_color=mask_color,\n            thickness=thickness,\n            font_size=font_size,\n            win_name=win_name,\n            show=show,\n            wait_time=wait_time,\n            out_file=out_file)\n\n        if not (show or out_file):\n            return img\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/nasfcos.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass NASFCOS(SingleStageDetector):\n    \"\"\"NAS-FCOS: Fast Neural Architecture Search for Object Detection.\n\n    https://arxiv.org/abs/1906.0442\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(NASFCOS, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                      test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/paa.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass PAA(SingleStageDetector):\n    \"\"\"Implementation of `PAA <https://arxiv.org/pdf/2007.08103.pdf>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(PAA, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                  test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/panoptic_fpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .panoptic_two_stage_segmentor import TwoStagePanopticSegmentor\n\n\n@DETECTORS.register_module()\nclass PanopticFPN(TwoStagePanopticSegmentor):\n    r\"\"\"Implementation of `Panoptic feature pyramid\n    networks <https://arxiv.org/pdf/1901.02446>`_\"\"\"\n\n    def __init__(\n            self,\n            backbone,\n            neck=None,\n            rpn_head=None,\n            roi_head=None,\n            train_cfg=None,\n            test_cfg=None,\n            pretrained=None,\n            init_cfg=None,\n            # for panoptic segmentation\n            semantic_head=None,\n            panoptic_fusion_head=None):\n        super(PanopticFPN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg,\n            semantic_head=semantic_head,\n            panoptic_fusion_head=panoptic_fusion_head)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/panoptic_two_stage_segmentor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.core import INSTANCE_OFFSET, bbox2roi, multiclass_nms\nfrom mmdet.core.visualization import imshow_det_bboxes\nfrom ..builder import DETECTORS, build_head\nfrom ..roi_heads.mask_heads.fcn_mask_head import _do_paste_mask\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass TwoStagePanopticSegmentor(TwoStageDetector):\n    \"\"\"Base class of Two-stage Panoptic Segmentor.\n\n    As well as the components in TwoStageDetector, Panoptic Segmentor has extra\n    semantic_head and panoptic_fusion_head.\n    \"\"\"\n\n    def __init__(\n            self,\n            backbone,\n            neck=None,\n            rpn_head=None,\n            roi_head=None,\n            train_cfg=None,\n            test_cfg=None,\n            pretrained=None,\n            init_cfg=None,\n            # for panoptic segmentation\n            semantic_head=None,\n            panoptic_fusion_head=None):\n        super(TwoStagePanopticSegmentor,\n              self).__init__(backbone, neck, rpn_head, roi_head, train_cfg,\n                             test_cfg, pretrained, init_cfg)\n        if semantic_head is not None:\n            self.semantic_head = build_head(semantic_head)\n        if panoptic_fusion_head is not None:\n            panoptic_cfg = test_cfg.panoptic if test_cfg is not None else None\n            panoptic_fusion_head_ = panoptic_fusion_head.deepcopy()\n            panoptic_fusion_head_.update(test_cfg=panoptic_cfg)\n            self.panoptic_fusion_head = build_head(panoptic_fusion_head_)\n\n            self.num_things_classes = self.panoptic_fusion_head.\\\n                num_things_classes\n            self.num_stuff_classes = self.panoptic_fusion_head.\\\n                num_stuff_classes\n            self.num_classes = self.panoptic_fusion_head.num_classes\n\n    @property\n    def with_semantic_head(self):\n        return hasattr(self,\n                       'semantic_head') and self.semantic_head is not None\n\n    @property\n    def with_panoptic_fusion_head(self):\n        return hasattr(self, 'panoptic_fusion_heads') and \\\n               self.panoptic_fusion_head is not None\n\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/get_flops.py`\n        \"\"\"\n        raise NotImplementedError(\n            f'`forward_dummy` is not implemented in {self.__class__.__name__}')\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      gt_semantic_seg=None,\n                      proposals=None,\n                      **kwargs):\n        x = self.extract_feat(img)\n        losses = dict()\n\n        # RPN forward and loss\n        if self.with_rpn:\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            rpn_losses, proposal_list = self.rpn_head.forward_train(\n                x,\n                img_metas,\n                gt_bboxes,\n                gt_labels=None,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                proposal_cfg=proposal_cfg)\n            losses.update(rpn_losses)\n        else:\n            proposal_list = proposals\n\n        roi_losses = self.roi_head.forward_train(x, img_metas, proposal_list,\n                                                 gt_bboxes, gt_labels,\n                                                 gt_bboxes_ignore, gt_masks,\n                                                 **kwargs)\n        losses.update(roi_losses)\n\n        semantic_loss = self.semantic_head.forward_train(x, gt_semantic_seg)\n        losses.update(semantic_loss)\n\n        return losses\n\n    def simple_test_mask(self,\n                         x,\n                         img_metas,\n                         det_bboxes,\n                         det_labels,\n                         rescale=False):\n        \"\"\"Simple test for mask head without augmentation.\"\"\"\n        img_shapes = tuple(meta['ori_shape']\n                           for meta in img_metas) if rescale else tuple(\n                               meta['pad_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            masks = []\n            for img_shape in img_shapes:\n                out_shape = (0, self.roi_head.bbox_head.num_classes) \\\n                            + img_shape[:2]\n                masks.append(det_bboxes[0].new_zeros(out_shape))\n            mask_pred = det_bboxes[0].new_zeros((0, 80, 28, 28))\n            mask_results = dict(\n                masks=masks, mask_pred=mask_pred, mask_feats=None)\n            return mask_results\n\n        _bboxes = [det_bboxes[i][:, :4] for i in range(len(det_bboxes))]\n        if rescale:\n            if not isinstance(scale_factors[0], float):\n                scale_factors = [\n                    det_bboxes[0].new_tensor(scale_factor)\n                    for scale_factor in scale_factors\n                ]\n            _bboxes = [\n                _bboxes[i] * scale_factors[i] for i in range(len(_bboxes))\n            ]\n\n        mask_rois = bbox2roi(_bboxes)\n        mask_results = self.roi_head._mask_forward(x, mask_rois)\n        mask_pred = mask_results['mask_pred']\n        # split batch mask prediction back to each image\n        num_mask_roi_per_img = [len(det_bbox) for det_bbox in det_bboxes]\n        mask_preds = mask_pred.split(num_mask_roi_per_img, 0)\n\n        # resize the mask_preds to (K, H, W)\n        masks = []\n        for i in range(len(_bboxes)):\n            det_bbox = det_bboxes[i][:, :4]\n            det_label = det_labels[i]\n\n            mask_pred = mask_preds[i].sigmoid()\n\n            box_inds = torch.arange(mask_pred.shape[0])\n            mask_pred = mask_pred[box_inds, det_label][:, None]\n\n            img_h, img_w, _ = img_shapes[i]\n            mask_pred, _ = _do_paste_mask(\n                mask_pred, det_bbox, img_h, img_w, skip_empty=False)\n            masks.append(mask_pred)\n\n        mask_results['masks'] = masks\n\n        return mask_results\n\n    def simple_test(self, img, img_metas, proposals=None, rescale=False):\n        \"\"\"Test without Augmentation.\"\"\"\n        x = self.extract_feat(img)\n\n        if proposals is None:\n            proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)\n        else:\n            proposal_list = proposals\n\n        bboxes, scores = self.roi_head.simple_test_bboxes(\n            x, img_metas, proposal_list, None, rescale=rescale)\n\n        pan_cfg = self.test_cfg.panoptic\n        # class-wise predictions\n        det_bboxes = []\n        det_labels = []\n        for bboxe, score in zip(bboxes, scores):\n            det_bbox, det_label = multiclass_nms(bboxe, score,\n                                                 pan_cfg.score_thr,\n                                                 pan_cfg.nms,\n                                                 pan_cfg.max_per_img)\n            det_bboxes.append(det_bbox)\n            det_labels.append(det_label)\n\n        mask_results = self.simple_test_mask(\n            x, img_metas, det_bboxes, det_labels, rescale=rescale)\n        masks = mask_results['masks']\n\n        seg_preds = self.semantic_head.simple_test(x, img_metas, rescale)\n\n        results = []\n        for i in range(len(det_bboxes)):\n            pan_results = self.panoptic_fusion_head.simple_test(\n                det_bboxes[i], det_labels[i], masks[i], seg_preds[i])\n            pan_results = pan_results.int().detach().cpu().numpy()\n            result = dict(pan_results=pan_results)\n            results.append(result)\n        return results\n\n    def show_result(self,\n                    img,\n                    result,\n                    score_thr=0.3,\n                    bbox_color=(72, 101, 241),\n                    text_color=(72, 101, 241),\n                    mask_color=None,\n                    thickness=2,\n                    font_size=13,\n                    win_name='',\n                    show=False,\n                    wait_time=0,\n                    out_file=None):\n        \"\"\"Draw `result` over `img`.\n\n        Args:\n            img (str or Tensor): The image to be displayed.\n            result (dict): The results.\n\n            score_thr (float, optional): Minimum score of bboxes to be shown.\n                Default: 0.3.\n            bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.\n               The tuple of color should be in BGR order. Default: 'green'.\n            text_color (str or tuple(int) or :obj:`Color`):Color of texts.\n               The tuple of color should be in BGR order. Default: 'green'.\n            mask_color (None or str or tuple(int) or :obj:`Color`):\n               Color of masks. The tuple of color should be in BGR order.\n               Default: None.\n            thickness (int): Thickness of lines. Default: 2.\n            font_size (int): Font size of texts. Default: 13.\n            win_name (str): The window name. Default: ''.\n            wait_time (float): Value of waitKey param.\n                Default: 0.\n            show (bool): Whether to show the image.\n                Default: False.\n            out_file (str or None): The filename to write the image.\n                Default: None.\n\n        Returns:\n            img (Tensor): Only if not `show` or `out_file`.\n        \"\"\"\n        img = mmcv.imread(img)\n        img = img.copy()\n        pan_results = result['pan_results']\n        # keep objects ahead\n        ids = np.unique(pan_results)[::-1]\n        legal_indices = ids != self.num_classes  # for VOID label\n        ids = ids[legal_indices]\n        labels = np.array([id % INSTANCE_OFFSET for id in ids], dtype=np.int64)\n        segms = (pan_results[None] == ids[:, None, None])\n\n        # if out_file specified, do not show image in window\n        if out_file is not None:\n            show = False\n        # draw bounding boxes\n        img = imshow_det_bboxes(\n            img,\n            segms=segms,\n            labels=labels,\n            class_names=self.CLASSES,\n            bbox_color=bbox_color,\n            text_color=text_color,\n            mask_color=mask_color,\n            thickness=thickness,\n            font_size=font_size,\n            win_name=win_name,\n            show=show,\n            wait_time=wait_time,\n            out_file=out_file)\n\n        if not (show or out_file):\n            return img\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/point_rend.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass PointRend(TwoStageDetector):\n    \"\"\"PointRend: Image Segmentation as Rendering\n\n    This detector is the implementation of\n    `PointRend <https://arxiv.org/abs/1912.08193>`_.\n\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(PointRend, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/queryinst.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .sparse_rcnn import SparseRCNN\n\n\n@DETECTORS.register_module()\nclass QueryInst(SparseRCNN):\n    r\"\"\"Implementation of\n    `Instances as Queries <http://arxiv.org/abs/2105.01928>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(QueryInst, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/reppoints_detector.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass RepPointsDetector(SingleStageDetector):\n    \"\"\"RepPoints: Point Set Representation for Object Detection.\n\n        This detector is the implementation of:\n        - RepPoints detector (https://arxiv.org/pdf/1904.11490)\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(RepPointsDetector,\n              self).__init__(backbone, neck, bbox_head, train_cfg, test_cfg,\n                             pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/retinanet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass RetinaNet(SingleStageDetector):\n    \"\"\"Implementation of `RetinaNet <https://arxiv.org/abs/1708.02002>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(RetinaNet, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                        test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/rpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom inspect import signature\n\nimport mmcv\nimport torch\nfrom mmcv.image import tensor2imgs\n\nfrom mmdet.core import bbox_mapping\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import BaseDetector\n\n\n@DETECTORS.register_module()\nclass RPN(BaseDetector):\n    \"\"\"Implementation of Region Proposal Network.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 rpn_head,\n                 train_cfg,\n                 test_cfg,\n                 pretrained=None,\n                 init_cfg=None):\n        super(RPN, self).__init__(init_cfg)\n        if pretrained:\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            backbone.pretrained = pretrained\n        self.backbone = build_backbone(backbone)\n        self.neck = build_neck(neck) if neck is not None else None\n        rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None\n        rpn_head.update(train_cfg=rpn_train_cfg)\n        rpn_head.update(test_cfg=test_cfg.rpn)\n        self.rpn_head = build_head(rpn_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def extract_feat(self, img):\n        \"\"\"Extract features.\n\n        Args:\n            img (torch.Tensor): Image tensor with shape (n, c, h ,w).\n\n        Returns:\n            list[torch.Tensor]: Multi-level features that may have\n                different resolutions.\n        \"\"\"\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_dummy(self, img):\n        \"\"\"Dummy forward function.\"\"\"\n        x = self.extract_feat(img)\n        rpn_outs = self.rpn_head(x)\n        return rpn_outs\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes=None,\n                      gt_bboxes_ignore=None):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (N, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        if (isinstance(self.train_cfg.rpn, dict)\n                and self.train_cfg.rpn.get('debug', False)):\n            self.rpn_head.debug_imgs = tensor2imgs(img)\n\n        x = self.extract_feat(img)\n        losses = self.rpn_head.forward_train(x, img_metas, gt_bboxes, None,\n                                             gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, img, img_metas, rescale=False):\n        \"\"\"Test function without test time augmentation.\n\n        Args:\n            imgs (list[torch.Tensor]): List of multiple images\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[np.ndarray]: proposals\n        \"\"\"\n        x = self.extract_feat(img)\n        # get origin input shape to onnx dynamic input shape\n        if torch.onnx.is_in_onnx_export():\n            img_shape = torch._shape_as_tensor(img)[2:]\n            img_metas[0]['img_shape_for_onnx'] = img_shape\n        proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)\n        if rescale:\n            for proposals, meta in zip(proposal_list, img_metas):\n                proposals[:, :4] /= proposals.new_tensor(meta['scale_factor'])\n        if torch.onnx.is_in_onnx_export():\n            return proposal_list\n\n        return [proposal.cpu().numpy() for proposal in proposal_list]\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test function with test time augmentation.\n\n        Args:\n            imgs (list[torch.Tensor]): List of multiple images\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[np.ndarray]: proposals\n        \"\"\"\n        proposal_list = self.rpn_head.aug_test_rpn(\n            self.extract_feats(imgs), img_metas)\n        if not rescale:\n            for proposals, img_meta in zip(proposal_list, img_metas[0]):\n                img_shape = img_meta['img_shape']\n                scale_factor = img_meta['scale_factor']\n                flip = img_meta['flip']\n                flip_direction = img_meta['flip_direction']\n                proposals[:, :4] = bbox_mapping(proposals[:, :4], img_shape,\n                                                scale_factor, flip,\n                                                flip_direction)\n        return [proposal.cpu().numpy() for proposal in proposal_list]\n\n    def show_result(self, data, result, top_k=20, **kwargs):\n        \"\"\"Show RPN proposals on the image.\n\n        Args:\n            data (str or np.ndarray): Image filename or loaded image.\n            result (Tensor or tuple): The results to draw over `img`\n                bbox_result or (bbox_result, segm_result).\n            top_k (int): Plot the first k bboxes only\n               if set positive. Default: 20\n\n        Returns:\n            np.ndarray: The image with bboxes drawn on it.\n        \"\"\"\n        if kwargs is not None:\n            kwargs['colors'] = 'green'\n            sig = signature(mmcv.imshow_bboxes)\n            for k in list(kwargs.keys()):\n                if k not in sig.parameters:\n                    kwargs.pop(k)\n        mmcv.imshow_bboxes(data, result, top_k=top_k, **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/scnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .cascade_rcnn import CascadeRCNN\n\n\n@DETECTORS.register_module()\nclass SCNet(CascadeRCNN):\n    \"\"\"Implementation of `SCNet <https://arxiv.org/abs/2012.10150>`_\"\"\"\n\n    def __init__(self, **kwargs):\n        super(SCNet, self).__init__(**kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/single_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\n\nfrom mmdet.core import bbox2result\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import BaseDetector\n\n\n@DETECTORS.register_module()\nclass SingleStageDetector(BaseDetector):\n    \"\"\"Base class for single-stage detectors.\n\n    Single-stage detectors directly and densely predict bounding boxes on the\n    output features of the backbone+neck.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(SingleStageDetector, self).__init__(init_cfg)\n        if pretrained:\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            backbone.pretrained = pretrained\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n        bbox_head.update(train_cfg=train_cfg)\n        bbox_head.update(test_cfg=test_cfg)\n        self.bbox_head = build_head(bbox_head)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def extract_feat(self, img):\n        \"\"\"Directly extract features from the backbone+neck.\"\"\"\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        x = self.extract_feat(img)\n        outs = self.bbox_head(x)\n        return outs\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (N, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        super(SingleStageDetector, self).forward_train(img, img_metas)\n        x = self.extract_feat(img)\n        losses = self.bbox_head.forward_train(x, img_metas, gt_bboxes,\n                                              gt_labels, gt_bboxes_ignore)\n        return losses\n\n    def simple_test(self, img, img_metas, rescale=False):\n        \"\"\"Test function without test-time augmentation.\n\n        Args:\n            img (torch.Tensor): Images with shape (N, C, H, W).\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[list[np.ndarray]]: BBox results of each image and classes.\n                The outer list corresponds to each image. The inner list\n                corresponds to each class.\n        \"\"\"\n        feat = self.extract_feat(img)\n        results_list = self.bbox_head.simple_test(\n            feat, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)\n            for det_bboxes, det_labels in results_list\n        ]\n        return bbox_results\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test function with test time augmentation.\n\n        Args:\n            imgs (list[Tensor]): the outer list indicates test-time\n                augmentations and inner Tensor should have a shape NxCxHxW,\n                which contains all images in the batch.\n            img_metas (list[list[dict]]): the outer list indicates test-time\n                augs (multiscale, flip, etc.) and the inner list indicates\n                images in a batch. each dict has image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[list[np.ndarray]]: BBox results of each image and classes.\n                The outer list corresponds to each image. The inner list\n                corresponds to each class.\n        \"\"\"\n        assert hasattr(self.bbox_head, 'aug_test'), \\\n            f'{self.bbox_head.__class__.__name__}' \\\n            ' does not support test-time augmentation'\n\n        feats = self.extract_feats(imgs)\n        results_list = self.bbox_head.aug_test(\n            feats, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox2result(det_bboxes, det_labels, self.bbox_head.num_classes)\n            for det_bboxes, det_labels in results_list\n        ]\n        return bbox_results\n\n    def onnx_export(self, img, img_metas, with_nms=True):\n        \"\"\"Test function without test time augmentation.\n\n        Args:\n            img (torch.Tensor): input images.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]\n                and class labels of shape [N, num_det].\n        \"\"\"\n        x = self.extract_feat(img)\n        outs = self.bbox_head(x)\n        # get origin input shape to support onnx dynamic shape\n\n        # get shape as tensor\n        img_shape = torch._shape_as_tensor(img)[2:]\n        img_metas[0]['img_shape_for_onnx'] = img_shape\n        # get pad input shape to support onnx dynamic shape for exporting\n        # `CornerNet` and `CentripetalNet`, which 'pad_shape' is used\n        # for inference\n        img_metas[0]['pad_shape_for_onnx'] = img_shape\n\n        if len(outs) == 2:\n            # add dummy score_factor\n            outs = (*outs, None)\n        # TODO Can we change to `get_bboxes` when `onnx_export` fail\n        det_bboxes, det_labels = self.bbox_head.onnx_export(\n            *outs, img_metas, with_nms=with_nms)\n\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/single_stage_instance_seg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport warnings\n\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.core.visualization.image import imshow_det_bboxes\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import BaseDetector\n\nINF = 1e8\n\n\n@DETECTORS.register_module()\nclass SingleStageInstanceSegmentor(BaseDetector):\n    \"\"\"Base class for single-stage instance segmentors.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n\n        if pretrained:\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            backbone.pretrained = pretrained\n        super(SingleStageInstanceSegmentor, self).__init__(init_cfg=init_cfg)\n        self.backbone = build_backbone(backbone)\n        if neck is not None:\n            self.neck = build_neck(neck)\n        else:\n            self.neck = None\n        if bbox_head is not None:\n            bbox_head.update(train_cfg=copy.deepcopy(train_cfg))\n            bbox_head.update(test_cfg=copy.deepcopy(test_cfg))\n            self.bbox_head = build_head(bbox_head)\n        else:\n            self.bbox_head = None\n\n        assert mask_head, f'`mask_head` must ' \\\n                          f'be implemented in {self.__class__.__name__}'\n        mask_head.update(train_cfg=copy.deepcopy(train_cfg))\n        mask_head.update(test_cfg=copy.deepcopy(test_cfg))\n        self.mask_head = build_head(mask_head)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    def extract_feat(self, img):\n        \"\"\"Directly extract features from the backbone and neck.\"\"\"\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        raise NotImplementedError(\n            f'`forward_dummy` is not implemented in {self.__class__.__name__}')\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_masks,\n                      gt_labels,\n                      gt_bboxes=None,\n                      gt_bboxes_ignore=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (B, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_masks (list[:obj:`BitmapMasks`] | None) : The segmentation\n                masks for each box.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes (list[Tensor]): Each item is the truth boxes\n                of each image in [tl_x, tl_y, br_x, br_y] format.\n                Default: None.\n            gt_bboxes_ignore (list[Tensor] | None): Specify which bounding\n                boxes can be ignored when computing the loss.\n\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n\n        gt_masks = [\n            gt_mask.to_tensor(dtype=torch.bool, device=img.device)\n            for gt_mask in gt_masks\n        ]\n        x = self.extract_feat(img)\n        losses = dict()\n\n        # CondInst and YOLACT have bbox_head\n        if self.bbox_head:\n            # bbox_head_preds is a tuple\n            bbox_head_preds = self.bbox_head(x)\n            # positive_infos is a list of obj:`InstanceData`\n            # It contains the information about the positive samples\n            # CondInst, YOLACT\n            det_losses, positive_infos = self.bbox_head.loss(\n                *bbox_head_preds,\n                gt_bboxes=gt_bboxes,\n                gt_labels=gt_labels,\n                gt_masks=gt_masks,\n                img_metas=img_metas,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                **kwargs)\n            losses.update(det_losses)\n        else:\n            positive_infos = None\n\n        mask_loss = self.mask_head.forward_train(\n            x,\n            gt_labels,\n            gt_masks,\n            img_metas,\n            positive_infos=positive_infos,\n            gt_bboxes=gt_bboxes,\n            gt_bboxes_ignore=gt_bboxes_ignore,\n            **kwargs)\n        # avoid loss override\n        assert not set(mask_loss.keys()) & set(losses.keys())\n\n        losses.update(mask_loss)\n        return losses\n\n    def simple_test(self, img, img_metas, rescale=False):\n        \"\"\"Test function without test-time augmentation.\n\n        Args:\n            img (torch.Tensor): Images with shape (B, C, H, W).\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list(tuple): Formatted bbox and mask results of multiple \\\n                images. The outer list corresponds to each image. \\\n                Each tuple contains two type of results of single image:\n\n                - bbox_results (list[np.ndarray]): BBox results of\n                  single image. The list corresponds to each class.\n                  each ndarray has a shape (N, 5), N is the number of\n                  bboxes with this category, and last dimension\n                  5 arrange as (x1, y1, x2, y2, scores).\n                - mask_results (list[np.ndarray]): Mask results of\n                  single image. The list corresponds to each class.\n                  each ndarray has a shape (N, img_h, img_w), N\n                  is the number of masks with this category.\n        \"\"\"\n        feat = self.extract_feat(img)\n        if self.bbox_head:\n            outs = self.bbox_head(feat)\n            # results_list is list[obj:`InstanceData`]\n            results_list = self.bbox_head.get_results(\n                *outs, img_metas=img_metas, cfg=self.test_cfg, rescale=rescale)\n        else:\n            results_list = None\n\n        results_list = self.mask_head.simple_test(\n            feat, img_metas, rescale=rescale, instances_list=results_list)\n\n        format_results_list = []\n        for results in results_list:\n            format_results_list.append(self.format_results(results))\n\n        return format_results_list\n\n    def format_results(self, results):\n        \"\"\"Format the model predictions according to the interface with\n        dataset.\n\n        Args:\n            results (:obj:`InstanceData`): Processed\n                results of single images. Usually contains\n                following keys.\n\n                - scores (Tensor): Classification scores, has shape\n                  (num_instance,)\n                - labels (Tensor): Has shape (num_instances,).\n                - masks (Tensor): Processed mask results, has\n                  shape (num_instances, h, w).\n\n        Returns:\n            tuple: Formatted bbox and mask results.. It contains two items:\n\n                - bbox_results (list[np.ndarray]): BBox results of\n                  single image. The list corresponds to each class.\n                  each ndarray has a shape (N, 5), N is the number of\n                  bboxes with this category, and last dimension\n                  5 arrange as (x1, y1, x2, y2, scores).\n                - mask_results (list[np.ndarray]): Mask results of\n                  single image. The list corresponds to each class.\n                  each ndarray has shape (N, img_h, img_w), N\n                  is the number of masks with this category.\n        \"\"\"\n        data_keys = results.keys()\n        assert 'scores' in data_keys\n        assert 'labels' in data_keys\n\n        assert 'masks' in data_keys, \\\n            'results should contain ' \\\n            'masks when format the results '\n        mask_results = [[] for _ in range(self.mask_head.num_classes)]\n\n        num_masks = len(results)\n\n        if num_masks == 0:\n            bbox_results = [\n                np.zeros((0, 5), dtype=np.float32)\n                for _ in range(self.mask_head.num_classes)\n            ]\n            return bbox_results, mask_results\n\n        labels = results.labels.detach().cpu().numpy()\n\n        if 'bboxes' not in results:\n            # create dummy bbox results to store the scores\n            results.bboxes = results.scores.new_zeros(len(results), 4)\n\n        det_bboxes = torch.cat([results.bboxes, results.scores[:, None]],\n                               dim=-1)\n        det_bboxes = det_bboxes.detach().cpu().numpy()\n        bbox_results = [\n            det_bboxes[labels == i, :]\n            for i in range(self.mask_head.num_classes)\n        ]\n\n        masks = results.masks.detach().cpu().numpy()\n\n        for idx in range(num_masks):\n            mask = masks[idx]\n            mask_results[labels[idx]].append(mask)\n\n        return bbox_results, mask_results\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        raise NotImplementedError\n\n    def show_result(self,\n                    img,\n                    result,\n                    score_thr=0.3,\n                    bbox_color=(72, 101, 241),\n                    text_color=(72, 101, 241),\n                    mask_color=None,\n                    thickness=2,\n                    font_size=13,\n                    win_name='',\n                    show=False,\n                    wait_time=0,\n                    out_file=None):\n        \"\"\"Draw `result` over `img`.\n\n        Args:\n            img (str or Tensor): The image to be displayed.\n            result (tuple): Format bbox and mask results.\n                It contains two items:\n\n                - bbox_results (list[np.ndarray]): BBox results of\n                  single image. The list corresponds to each class.\n                  each ndarray has a shape (N, 5), N is the number of\n                  bboxes with this category, and last dimension\n                  5 arrange as (x1, y1, x2, y2, scores).\n                - mask_results (list[np.ndarray]): Mask results of\n                  single image. The list corresponds to each class.\n                  each ndarray has shape (N, img_h, img_w), N\n                  is the number of masks with this category.\n\n            score_thr (float, optional): Minimum score of bboxes to be shown.\n                Default: 0.3.\n            bbox_color (str or tuple(int) or :obj:`Color`):Color of bbox lines.\n               The tuple of color should be in BGR order. Default: 'green'\n            text_color (str or tuple(int) or :obj:`Color`):Color of texts.\n               The tuple of color should be in BGR order. Default: 'green'\n            mask_color (None or str or tuple(int) or :obj:`Color`):\n               Color of masks. The tuple of color should be in BGR order.\n               Default: None\n            thickness (int): Thickness of lines. Default: 2\n            font_size (int): Font size of texts. Default: 13\n            win_name (str): The window name. Default: ''\n            wait_time (float): Value of waitKey param.\n                Default: 0.\n            show (bool): Whether to show the image.\n                Default: False.\n            out_file (str or None): The filename to write the image.\n                Default: None.\n\n        Returns:\n            img (Tensor): Only if not `show` or `out_file`\n        \"\"\"\n\n        assert isinstance(result, tuple)\n        bbox_result, mask_result = result\n        bboxes = np.vstack(bbox_result)\n        img = mmcv.imread(img)\n        img = img.copy()\n        labels = [\n            np.full(bbox.shape[0], i, dtype=np.int32)\n            for i, bbox in enumerate(bbox_result)\n        ]\n        labels = np.concatenate(labels)\n        if len(labels) == 0:\n            bboxes = np.zeros([0, 5])\n            masks = np.zeros([0, 0, 0])\n        # draw segmentation masks\n        else:\n            masks = mmcv.concat_list(mask_result)\n\n            if isinstance(masks[0], torch.Tensor):\n                masks = torch.stack(masks, dim=0).detach().cpu().numpy()\n            else:\n                masks = np.stack(masks, axis=0)\n            # dummy bboxes\n            if bboxes[:, :4].sum() == 0:\n                num_masks = len(bboxes)\n                x_any = masks.any(axis=1)\n                y_any = masks.any(axis=2)\n                for idx in range(num_masks):\n                    x = np.where(x_any[idx, :])[0]\n                    y = np.where(y_any[idx, :])[0]\n                    if len(x) > 0 and len(y) > 0:\n                        bboxes[idx, :4] = np.array(\n                            [x[0], y[0], x[-1] + 1, y[-1] + 1],\n                            dtype=np.float32)\n        # if out_file specified, do not show image in window\n        if out_file is not None:\n            show = False\n        # draw bounding boxes\n        img = imshow_det_bboxes(\n            img,\n            bboxes,\n            labels,\n            masks,\n            class_names=self.CLASSES,\n            score_thr=score_thr,\n            bbox_color=bbox_color,\n            text_color=text_color,\n            mask_color=mask_color,\n            thickness=thickness,\n            font_size=font_size,\n            win_name=win_name,\n            show=show,\n            wait_time=wait_time,\n            out_file=out_file)\n\n        if not (show or out_file):\n            return img\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/solo.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage_instance_seg import SingleStageInstanceSegmentor\n\n\n@DETECTORS.register_module()\nclass SOLO(SingleStageInstanceSegmentor):\n    \"\"\"`SOLO: Segmenting Objects by Locations\n    <https://arxiv.org/abs/1912.04488>`_\n\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super().__init__(\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            mask_head=mask_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg,\n            pretrained=pretrained)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/solov2.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage_instance_seg import SingleStageInstanceSegmentor\n\n\n@DETECTORS.register_module()\nclass SOLOv2(SingleStageInstanceSegmentor):\n    \"\"\"`SOLOv2: Dynamic and Fast Instance Segmentation\n    <https://arxiv.org/abs/2003.10152>`_\n\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 bbox_head=None,\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 init_cfg=None,\n                 pretrained=None):\n        super().__init__(\n            backbone=backbone,\n            neck=neck,\n            bbox_head=bbox_head,\n            mask_head=mask_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            init_cfg=init_cfg,\n            pretrained=pretrained)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/sparse_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .two_stage import TwoStageDetector\n\n\n@DETECTORS.register_module()\nclass SparseRCNN(TwoStageDetector):\n    r\"\"\"Implementation of `Sparse R-CNN: End-to-End Object Detection with\n    Learnable Proposals <https://arxiv.org/abs/2011.12450>`_\"\"\"\n\n    def __init__(self, *args, **kwargs):\n        super(SparseRCNN, self).__init__(*args, **kwargs)\n        assert self.with_rpn, 'Sparse R-CNN and QueryInst ' \\\n            'do not support external proposals'\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      proposals=None,\n                      **kwargs):\n        \"\"\"Forward function of SparseR-CNN and QueryInst in train stage.\n\n        Args:\n            img (Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor): specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_masks (List[Tensor], optional) : Segmentation masks for\n                each box. This is required to train QueryInst.\n            proposals (List[Tensor], optional): override rpn proposals with\n                custom proposals. Use when `with_rpn` is False.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n\n        assert proposals is None, 'Sparse R-CNN and QueryInst ' \\\n            'do not support external proposals'\n\n        x = self.extract_feat(img)\n        proposal_boxes, proposal_features, imgs_whwh = \\\n            self.rpn_head.forward_train(x, img_metas)\n        roi_losses = self.roi_head.forward_train(\n            x,\n            proposal_boxes,\n            proposal_features,\n            img_metas,\n            gt_bboxes,\n            gt_labels,\n            gt_bboxes_ignore=gt_bboxes_ignore,\n            gt_masks=gt_masks,\n            imgs_whwh=imgs_whwh)\n        return roi_losses\n\n    def simple_test(self, img, img_metas, rescale=False):\n        \"\"\"Test function without test time augmentation.\n\n        Args:\n            imgs (list[torch.Tensor]): List of multiple images\n            img_metas (list[dict]): List of image information.\n            rescale (bool): Whether to rescale the results.\n                Defaults to False.\n\n        Returns:\n            list[list[np.ndarray]]: BBox results of each image and classes.\n                The outer list corresponds to each image. The inner list\n                corresponds to each class.\n        \"\"\"\n        x = self.extract_feat(img)\n        proposal_boxes, proposal_features, imgs_whwh = \\\n            self.rpn_head.simple_test_rpn(x, img_metas)\n        results = self.roi_head.simple_test(\n            x,\n            proposal_boxes,\n            proposal_features,\n            img_metas,\n            imgs_whwh=imgs_whwh,\n            rescale=rescale)\n        return results\n\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        # backbone\n        x = self.extract_feat(img)\n        # rpn\n        num_imgs = len(img)\n        dummy_img_metas = [\n            dict(img_shape=(800, 1333, 3)) for _ in range(num_imgs)\n        ]\n        proposal_boxes, proposal_features, imgs_whwh = \\\n            self.rpn_head.simple_test_rpn(x, dummy_img_metas)\n        # roi_head\n        roi_outs = self.roi_head.forward_dummy(x, proposal_boxes,\n                                               proposal_features,\n                                               dummy_img_metas)\n        return roi_outs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/tood.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass TOOD(SingleStageDetector):\n    r\"\"\"Implementation of `TOOD: Task-aligned One-stage Object Detection.\n    <https://arxiv.org/abs/2108.07755>`_.\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(TOOD, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                   test_cfg, pretrained, init_cfg)\n\n    def set_epoch(self, epoch):\n        self.bbox_head.epoch = epoch\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/trident_faster_rcnn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .faster_rcnn import FasterRCNN\n\n\n@DETECTORS.register_module()\nclass TridentFasterRCNN(FasterRCNN):\n    \"\"\"Implementation of `TridentNet <https://arxiv.org/abs/1901.01892>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 rpn_head,\n                 roi_head,\n                 train_cfg,\n                 test_cfg,\n                 neck=None,\n                 pretrained=None,\n                 init_cfg=None):\n\n        super(TridentFasterRCNN, self).__init__(\n            backbone=backbone,\n            neck=neck,\n            rpn_head=rpn_head,\n            roi_head=roi_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n        assert self.backbone.num_branch == self.roi_head.num_branch\n        assert self.backbone.test_branch_idx == self.roi_head.test_branch_idx\n        self.num_branch = self.backbone.num_branch\n        self.test_branch_idx = self.backbone.test_branch_idx\n\n    def simple_test(self, img, img_metas, proposals=None, rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        x = self.extract_feat(img)\n        if proposals is None:\n            num_branch = (self.num_branch if self.test_branch_idx == -1 else 1)\n            trident_img_metas = img_metas * num_branch\n            proposal_list = self.rpn_head.simple_test_rpn(x, trident_img_metas)\n        else:\n            proposal_list = proposals\n        # TODO： Fix trident_img_metas undefined errors\n        #  when proposals is specified\n        return self.roi_head.simple_test(\n            x, proposal_list, trident_img_metas, rescale=rescale)\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        x = self.extract_feats(imgs)\n        num_branch = (self.num_branch if self.test_branch_idx == -1 else 1)\n        trident_img_metas = [img_metas * num_branch for img_metas in img_metas]\n        proposal_list = self.rpn_head.aug_test_rpn(x, trident_img_metas)\n        return self.roi_head.aug_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def forward_train(self, img, img_metas, gt_bboxes, gt_labels, **kwargs):\n        \"\"\"make copies of img and gts to fit multi-branch.\"\"\"\n        trident_gt_bboxes = tuple(gt_bboxes * self.num_branch)\n        trident_gt_labels = tuple(gt_labels * self.num_branch)\n        trident_img_metas = tuple(img_metas * self.num_branch)\n\n        return super(TridentFasterRCNN,\n                     self).forward_train(img, trident_img_metas,\n                                         trident_gt_bboxes, trident_gt_labels)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/two_stage.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\n\nfrom ..builder import DETECTORS, build_backbone, build_head, build_neck\nfrom .base import BaseDetector\n\n\n@DETECTORS.register_module()\nclass TwoStageDetector(BaseDetector):\n    \"\"\"Base class for two-stage detectors.\n\n    Two-stage detectors typically consisting of a region proposal network and a\n    task-specific regression head.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck=None,\n                 rpn_head=None,\n                 roi_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(TwoStageDetector, self).__init__(init_cfg)\n        if pretrained:\n            warnings.warn('DeprecationWarning: pretrained is deprecated, '\n                          'please use \"init_cfg\" instead')\n            backbone.pretrained = pretrained\n        self.backbone = build_backbone(backbone)\n\n        if neck is not None:\n            self.neck = build_neck(neck)\n\n        if rpn_head is not None:\n            rpn_train_cfg = train_cfg.rpn if train_cfg is not None else None\n            rpn_head_ = rpn_head.copy()\n            rpn_head_.update(train_cfg=rpn_train_cfg, test_cfg=test_cfg.rpn)\n            self.rpn_head = build_head(rpn_head_)\n\n        if roi_head is not None:\n            # update train and test cfg here for now\n            # TODO: refactor assigner & sampler\n            rcnn_train_cfg = train_cfg.rcnn if train_cfg is not None else None\n            roi_head.update(train_cfg=rcnn_train_cfg)\n            roi_head.update(test_cfg=test_cfg.rcnn)\n            roi_head.pretrained = pretrained\n            self.roi_head = build_head(roi_head)\n\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n\n    @property\n    def with_rpn(self):\n        \"\"\"bool: whether the detector has RPN\"\"\"\n        return hasattr(self, 'rpn_head') and self.rpn_head is not None\n\n    @property\n    def with_roi_head(self):\n        \"\"\"bool: whether the detector has a RoI head\"\"\"\n        return hasattr(self, 'roi_head') and self.roi_head is not None\n\n    def extract_feat(self, img):\n        \"\"\"Directly extract features from the backbone+neck.\"\"\"\n        x = self.backbone(img)\n        if self.with_neck:\n            x = self.neck(x)\n        return x\n\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        outs = ()\n        # backbone\n        x = self.extract_feat(img)\n        # rpn\n        if self.with_rpn:\n            rpn_outs = self.rpn_head(x)\n            outs = outs + (rpn_outs, )\n        proposals = torch.randn(1000, 4).to(img.device)\n        # roi_head\n        roi_outs = self.roi_head.forward_dummy(x, proposals)\n        outs = outs + (roi_outs, )\n        return outs\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      proposals=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            img (Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n\n            gt_labels (list[Tensor]): class indices corresponding to each box\n\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n            gt_masks (None | Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n            proposals : override rpn proposals with custom proposals. Use when\n                `with_rpn` is False.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        x = self.extract_feat(img)\n\n        losses = dict()\n\n        # RPN forward and loss\n        if self.with_rpn:\n            proposal_cfg = self.train_cfg.get('rpn_proposal',\n                                              self.test_cfg.rpn)\n            rpn_losses, proposal_list = self.rpn_head.forward_train(\n                x,\n                img_metas,\n                gt_bboxes,\n                gt_labels=None,\n                gt_bboxes_ignore=gt_bboxes_ignore,\n                proposal_cfg=proposal_cfg,\n                **kwargs)\n            losses.update(rpn_losses)\n        else:\n            proposal_list = proposals\n\n        roi_losses = self.roi_head.forward_train(x, img_metas, proposal_list,\n                                                 gt_bboxes, gt_labels,\n                                                 gt_bboxes_ignore, gt_masks,\n                                                 **kwargs)\n        losses.update(roi_losses)\n\n        return losses\n\n    async def async_simple_test(self,\n                                img,\n                                img_meta,\n                                proposals=None,\n                                rescale=False):\n        \"\"\"Async test without augmentation.\"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        x = self.extract_feat(img)\n\n        if proposals is None:\n            proposal_list = await self.rpn_head.async_simple_test_rpn(\n                x, img_meta)\n        else:\n            proposal_list = proposals\n\n        return await self.roi_head.async_simple_test(\n            x, proposal_list, img_meta, rescale=rescale)\n\n    def simple_test(self, img, img_metas, proposals=None, rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        x = self.extract_feat(img)\n        if proposals is None:\n            proposal_list = self.rpn_head.simple_test_rpn(x, img_metas)\n        else:\n            proposal_list = proposals\n\n        return self.roi_head.simple_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        x = self.extract_feats(imgs)\n        proposal_list = self.rpn_head.aug_test_rpn(x, img_metas)\n        return self.roi_head.aug_test(\n            x, proposal_list, img_metas, rescale=rescale)\n\n    def onnx_export(self, img, img_metas):\n\n        img_shape = torch._shape_as_tensor(img)[2:]\n        img_metas[0]['img_shape_for_onnx'] = img_shape\n        x = self.extract_feat(img)\n        proposals = self.rpn_head.onnx_export(x, img_metas)\n        if hasattr(self.roi_head, 'onnx_export'):\n            return self.roi_head.onnx_export(x, proposals, img_metas)\n        else:\n            raise NotImplementedError(\n                f'{self.__class__.__name__} can not '\n                f'be exported to ONNX. Please refer to the '\n                f'list of supported models,'\n                f'https://mmdetection.readthedocs.io/en/latest/tutorials/pytorch2onnx.html#list-of-supported-models-exportable-to-onnx'  # noqa E501\n            )\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/vfnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass VFNet(SingleStageDetector):\n    \"\"\"Implementation of `VarifocalNet\n    (VFNet).<https://arxiv.org/abs/2008.13367>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(VFNet, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                    test_cfg, pretrained, init_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/yolact.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import bbox2result\nfrom ..builder import DETECTORS, build_head\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass YOLACT(SingleStageDetector):\n    \"\"\"Implementation of `YOLACT <https://arxiv.org/abs/1904.02689>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 segm_head,\n                 mask_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(YOLACT, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                     test_cfg, pretrained, init_cfg)\n        self.segm_head = build_head(segm_head)\n        self.mask_head = build_head(mask_head)\n\n    def forward_dummy(self, img):\n        \"\"\"Used for computing network flops.\n\n        See `mmdetection/tools/analysis_tools/get_flops.py`\n        \"\"\"\n        feat = self.extract_feat(img)\n        bbox_outs = self.bbox_head(feat)\n        prototypes = self.mask_head.forward_dummy(feat[0])\n        return (bbox_outs, prototypes)\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None):\n        \"\"\"\n        Args:\n            img (Tensor): of shape (N, C, H, W) encoding input images.\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_masks (None | Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # convert Bitmap mask or Polygon Mask to Tensor here\n        gt_masks = [\n            gt_mask.to_tensor(dtype=torch.uint8, device=img.device)\n            for gt_mask in gt_masks\n        ]\n\n        x = self.extract_feat(img)\n\n        cls_score, bbox_pred, coeff_pred = self.bbox_head(x)\n        bbox_head_loss_inputs = (cls_score, bbox_pred) + (gt_bboxes, gt_labels,\n                                                          img_metas)\n        losses, sampling_results = self.bbox_head.loss(\n            *bbox_head_loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)\n\n        segm_head_outs = self.segm_head(x[0])\n        loss_segm = self.segm_head.loss(segm_head_outs, gt_masks, gt_labels)\n        losses.update(loss_segm)\n\n        mask_pred = self.mask_head(x[0], coeff_pred, gt_bboxes, img_metas,\n                                   sampling_results)\n        loss_mask = self.mask_head.loss(mask_pred, gt_masks, gt_bboxes,\n                                        img_metas, sampling_results)\n        losses.update(loss_mask)\n\n        # check NaN and Inf\n        for loss_name in losses.keys():\n            assert torch.isfinite(torch.stack(losses[loss_name]))\\\n                .all().item(), '{} becomes infinite or NaN!'\\\n                .format(loss_name)\n\n        return losses\n\n    def simple_test(self, img, img_metas, rescale=False):\n        \"\"\"Test function without test-time augmentation.\"\"\"\n        feat = self.extract_feat(img)\n        det_bboxes, det_labels, det_coeffs = self.bbox_head.simple_test(\n            feat, img_metas, rescale=rescale)\n        bbox_results = [\n            bbox2result(det_bbox, det_label, self.bbox_head.num_classes)\n            for det_bbox, det_label in zip(det_bboxes, det_labels)\n        ]\n\n        segm_results = self.mask_head.simple_test(\n            feat,\n            det_bboxes,\n            det_labels,\n            det_coeffs,\n            img_metas,\n            rescale=rescale)\n\n        return list(zip(bbox_results, segm_results))\n\n    def aug_test(self, imgs, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\"\"\"\n        raise NotImplementedError(\n            'YOLACT does not support test-time augmentation')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/yolo.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Copyright (c) 2019 Western Digital Corporation or its affiliates.\nimport torch\n\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass YOLOV3(SingleStageDetector):\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(YOLOV3, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                     test_cfg, pretrained, init_cfg)\n\n    def onnx_export(self, img, img_metas):\n        \"\"\"Test function for exporting to ONNX, without test time augmentation.\n\n        Args:\n            img (torch.Tensor): input images.\n            img_metas (list[dict]): List of image information.\n\n        Returns:\n            tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]\n                and class labels of shape [N, num_det].\n        \"\"\"\n        x = self.extract_feat(img)\n        outs = self.bbox_head.forward(x)\n        # get shape as tensor\n        img_shape = torch._shape_as_tensor(img)[2:]\n        img_metas[0]['img_shape_for_onnx'] = img_shape\n\n        det_bboxes, det_labels = self.bbox_head.onnx_export(*outs, img_metas)\n\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/yolof.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass YOLOF(SingleStageDetector):\n    r\"\"\"Implementation of `You Only Look One-level Feature\n    <https://arxiv.org/abs/2103.09460>`_\"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None):\n        super(YOLOF, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                    test_cfg, pretrained)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/detectors/yolox.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport random\n\nimport torch\nimport torch.distributed as dist\nimport torch.nn.functional as F\nfrom mmcv.runner import get_dist_info\n\nfrom ...utils import log_img_scale\nfrom ..builder import DETECTORS\nfrom .single_stage import SingleStageDetector\n\n\n@DETECTORS.register_module()\nclass YOLOX(SingleStageDetector):\n    r\"\"\"Implementation of `YOLOX: Exceeding YOLO Series in 2021\n    <https://arxiv.org/abs/2107.08430>`_\n\n    Note: Considering the trade-off between training speed and accuracy,\n    multi-scale training is temporarily kept. More elegant implementation\n    will be adopted in the future.\n\n    Args:\n        backbone (nn.Module): The backbone module.\n        neck (nn.Module): The neck module.\n        bbox_head (nn.Module): The bbox head module.\n        train_cfg (obj:`ConfigDict`, optional): The training config\n            of YOLOX. Default: None.\n        test_cfg (obj:`ConfigDict`, optional): The testing config\n            of YOLOX. Default: None.\n        pretrained (str, optional): model pretrained path.\n            Default: None.\n        input_size (tuple): The model default input image size. The shape\n            order should be (height, width). Default: (640, 640).\n        size_multiplier (int): Image size multiplication factor.\n            Default: 32.\n        random_size_range (tuple): The multi-scale random range during\n            multi-scale training. The real training image size will\n            be multiplied by size_multiplier. Default: (15, 25).\n        random_size_interval (int): The iter interval of change\n            image size. Default: 10.\n        init_cfg (dict, optional): Initialization config dict.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 backbone,\n                 neck,\n                 bbox_head,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 input_size=(640, 640),\n                 size_multiplier=32,\n                 random_size_range=(15, 25),\n                 random_size_interval=10,\n                 init_cfg=None):\n        super(YOLOX, self).__init__(backbone, neck, bbox_head, train_cfg,\n                                    test_cfg, pretrained, init_cfg)\n        log_img_scale(input_size, skip_square=True)\n        self.rank, self.world_size = get_dist_info()\n        self._default_input_size = input_size\n        self._input_size = input_size\n        self._random_size_range = random_size_range\n        self._random_size_interval = random_size_interval\n        self._size_multiplier = size_multiplier\n        self._progress_in_iter = 0\n\n    def forward_train(self,\n                      img,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None):\n        \"\"\"\n        Args:\n            img (Tensor): Input images of shape (N, C, H, W).\n                Typically these should be mean centered and std scaled.\n            img_metas (list[dict]): A List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                :class:`mmdet.datasets.pipelines.Collect`.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): Specify which bounding\n                boxes can be ignored when computing the loss.\n        Returns:\n            dict[str, Tensor]: A dictionary of loss components.\n        \"\"\"\n        # Multi-scale training\n        img, gt_bboxes = self._preprocess(img, gt_bboxes)\n\n        losses = super(YOLOX, self).forward_train(img, img_metas, gt_bboxes,\n                                                  gt_labels, gt_bboxes_ignore)\n\n        # random resizing\n        if (self._progress_in_iter + 1) % self._random_size_interval == 0:\n            self._input_size = self._random_resize(device=img.device)\n        self._progress_in_iter += 1\n\n        return losses\n\n    def _preprocess(self, img, gt_bboxes):\n        scale_y = self._input_size[0] / self._default_input_size[0]\n        scale_x = self._input_size[1] / self._default_input_size[1]\n        if scale_x != 1 or scale_y != 1:\n            img = F.interpolate(\n                img,\n                size=self._input_size,\n                mode='bilinear',\n                align_corners=False)\n            for gt_bbox in gt_bboxes:\n                gt_bbox[..., 0::2] = gt_bbox[..., 0::2] * scale_x\n                gt_bbox[..., 1::2] = gt_bbox[..., 1::2] * scale_y\n        return img, gt_bboxes\n\n    def _random_resize(self, device):\n        tensor = torch.LongTensor(2).to(device)\n\n        if self.rank == 0:\n            size = random.randint(*self._random_size_range)\n            aspect_ratio = float(\n                self._default_input_size[1]) / self._default_input_size[0]\n            size = (self._size_multiplier * size,\n                    self._size_multiplier * int(aspect_ratio * size))\n            tensor[0] = size[0]\n            tensor[1] = size[1]\n\n        if self.world_size > 1:\n            dist.barrier()\n            dist.broadcast(tensor, 0)\n\n        input_size = (tensor[0].item(), tensor[1].item())\n        return input_size\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .accuracy import Accuracy, accuracy\nfrom .ae_loss import AssociativeEmbeddingLoss\nfrom .balanced_l1_loss import BalancedL1Loss, balanced_l1_loss\nfrom .cross_entropy_loss import (CrossEntropyLoss, binary_cross_entropy,\n                                 cross_entropy, mask_cross_entropy)\nfrom .dice_loss import DiceLoss\nfrom .focal_loss import FocalLoss, sigmoid_focal_loss\nfrom .gaussian_focal_loss import GaussianFocalLoss\nfrom .gfocal_loss import DistributionFocalLoss, QualityFocalLoss\nfrom .ghm_loss import GHMC, GHMR\nfrom .iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss, GIoULoss, IoULoss,\n                       bounded_iou_loss, iou_loss)\nfrom .kd_loss import KnowledgeDistillationKLDivLoss\nfrom .mse_loss import MSELoss, mse_loss\nfrom .pisa_loss import carl_loss, isr_p\nfrom .seesaw_loss import SeesawLoss\nfrom .smooth_l1_loss import L1Loss, SmoothL1Loss, l1_loss, smooth_l1_loss\nfrom .utils import reduce_loss, weight_reduce_loss, weighted_loss\nfrom .varifocal_loss import VarifocalLoss\n\n__all__ = [\n    'accuracy', 'Accuracy', 'cross_entropy', 'binary_cross_entropy',\n    'mask_cross_entropy', 'CrossEntropyLoss', 'sigmoid_focal_loss',\n    'FocalLoss', 'smooth_l1_loss', 'SmoothL1Loss', 'balanced_l1_loss',\n    'BalancedL1Loss', 'mse_loss', 'MSELoss', 'iou_loss', 'bounded_iou_loss',\n    'IoULoss', 'BoundedIoULoss', 'GIoULoss', 'DIoULoss', 'CIoULoss', 'GHMC',\n    'GHMR', 'reduce_loss', 'weight_reduce_loss', 'weighted_loss', 'L1Loss',\n    'l1_loss', 'isr_p', 'carl_loss', 'AssociativeEmbeddingLoss',\n    'GaussianFocalLoss', 'QualityFocalLoss', 'DistributionFocalLoss',\n    'VarifocalLoss', 'KnowledgeDistillationKLDivLoss', 'SeesawLoss', 'DiceLoss'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/accuracy.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch.nn as nn\n\n\n@mmcv.jit(coderize=True)\ndef accuracy(pred, target, topk=1, thresh=None):\n    \"\"\"Calculate accuracy according to the prediction and target.\n\n    Args:\n        pred (torch.Tensor): The model prediction, shape (N, num_class)\n        target (torch.Tensor): The target of each prediction, shape (N, )\n        topk (int | tuple[int], optional): If the predictions in ``topk``\n            matches the target, the predictions will be regarded as\n            correct ones. Defaults to 1.\n        thresh (float, optional): If not None, predictions with scores under\n            this threshold are considered incorrect. Default to None.\n\n    Returns:\n        float | tuple[float]: If the input ``topk`` is a single integer,\n            the function will return a single float as accuracy. If\n            ``topk`` is a tuple containing multiple integers, the\n            function will return a tuple containing accuracies of\n            each ``topk`` number.\n    \"\"\"\n    assert isinstance(topk, (int, tuple))\n    if isinstance(topk, int):\n        topk = (topk, )\n        return_single = True\n    else:\n        return_single = False\n\n    maxk = max(topk)\n    if pred.size(0) == 0:\n        accu = [pred.new_tensor(0.) for i in range(len(topk))]\n        return accu[0] if return_single else accu\n    assert pred.ndim == 2 and target.ndim == 1\n    assert pred.size(0) == target.size(0)\n    assert maxk <= pred.size(1), \\\n        f'maxk {maxk} exceeds pred dimension {pred.size(1)}'\n    pred_value, pred_label = pred.topk(maxk, dim=1)\n    pred_label = pred_label.t()  # transpose to shape (maxk, N)\n    correct = pred_label.eq(target.view(1, -1).expand_as(pred_label))\n    if thresh is not None:\n        # Only prediction values larger than thresh are counted as correct\n        correct = correct & (pred_value > thresh).t()\n    res = []\n    for k in topk:\n        correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)\n        res.append(correct_k.mul_(100.0 / pred.size(0)))\n    return res[0] if return_single else res\n\n\nclass Accuracy(nn.Module):\n\n    def __init__(self, topk=(1, ), thresh=None):\n        \"\"\"Module to calculate the accuracy.\n\n        Args:\n            topk (tuple, optional): The criterion used to calculate the\n                accuracy. Defaults to (1,).\n            thresh (float, optional): If not None, predictions with scores\n                under this threshold are considered incorrect. Default to None.\n        \"\"\"\n        super().__init__()\n        self.topk = topk\n        self.thresh = thresh\n\n    def forward(self, pred, target):\n        \"\"\"Forward function to calculate accuracy.\n\n        Args:\n            pred (torch.Tensor): Prediction of models.\n            target (torch.Tensor): Target for each prediction.\n\n        Returns:\n            tuple[float]: The accuracies under different topk criterions.\n        \"\"\"\n        return accuracy(pred, target, self.topk, self.thresh)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/ae_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\n\n\n@mmcv.jit(derivate=True, coderize=True)\ndef ae_loss_per_image(tl_preds, br_preds, match):\n    \"\"\"Associative Embedding Loss in one image.\n\n    Associative Embedding Loss including two parts: pull loss and push loss.\n    Pull loss makes embedding vectors from same object closer to each other.\n    Push loss distinguish embedding vector from different objects, and makes\n        the gap between them is large enough.\n\n    During computing, usually there are 3 cases:\n        - no object in image: both pull loss and push loss will be 0.\n        - one object in image: push loss will be 0 and pull loss is computed\n            by the two corner of the only object.\n        - more than one objects in image: pull loss is computed by corner pairs\n            from each object, push loss is computed by each object with all\n            other objects. We use confusion matrix with 0 in diagonal to\n            compute the push loss.\n\n    Args:\n        tl_preds (tensor): Embedding feature map of left-top corner.\n        br_preds (tensor): Embedding feature map of bottim-right corner.\n        match (list): Downsampled coordinates pair of each ground truth box.\n    \"\"\"\n\n    tl_list, br_list, me_list = [], [], []\n    if len(match) == 0:  # no object in image\n        pull_loss = tl_preds.sum() * 0.\n        push_loss = tl_preds.sum() * 0.\n    else:\n        for m in match:\n            [tl_y, tl_x], [br_y, br_x] = m\n            tl_e = tl_preds[:, tl_y, tl_x].view(-1, 1)\n            br_e = br_preds[:, br_y, br_x].view(-1, 1)\n            tl_list.append(tl_e)\n            br_list.append(br_e)\n            me_list.append((tl_e + br_e) / 2.0)\n\n        tl_list = torch.cat(tl_list)\n        br_list = torch.cat(br_list)\n        me_list = torch.cat(me_list)\n\n        assert tl_list.size() == br_list.size()\n\n        # N is object number in image, M is dimension of embedding vector\n        N, M = tl_list.size()\n\n        pull_loss = (tl_list - me_list).pow(2) + (br_list - me_list).pow(2)\n        pull_loss = pull_loss.sum() / N\n\n        margin = 1  # exp setting of CornerNet, details in section 3.3 of paper\n\n        # confusion matrix of push loss\n        conf_mat = me_list.expand((N, N, M)).permute(1, 0, 2) - me_list\n        conf_weight = 1 - torch.eye(N).type_as(me_list)\n        conf_mat = conf_weight * (margin - conf_mat.sum(-1).abs())\n\n        if N > 1:  # more than one object in current image\n            push_loss = F.relu(conf_mat).sum() / (N * (N - 1))\n        else:\n            push_loss = tl_preds.sum() * 0.\n\n    return pull_loss, push_loss\n\n\n@LOSSES.register_module()\nclass AssociativeEmbeddingLoss(nn.Module):\n    \"\"\"Associative Embedding Loss.\n\n    More details can be found in\n    `Associative Embedding <https://arxiv.org/abs/1611.05424>`_ and\n    `CornerNet <https://arxiv.org/abs/1808.01244>`_ .\n    Code is modified from `kp_utils.py <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L180>`_  # noqa: E501\n\n    Args:\n        pull_weight (float): Loss weight for corners from same object.\n        push_weight (float): Loss weight for corners from different object.\n    \"\"\"\n\n    def __init__(self, pull_weight=0.25, push_weight=0.25):\n        super(AssociativeEmbeddingLoss, self).__init__()\n        self.pull_weight = pull_weight\n        self.push_weight = push_weight\n\n    def forward(self, pred, target, match):\n        \"\"\"Forward function.\"\"\"\n        batch = pred.size(0)\n        pull_all, push_all = 0.0, 0.0\n        for i in range(batch):\n            pull, push = ae_loss_per_image(pred[i], target[i], match[i])\n\n            pull_all += self.pull_weight * pull\n            push_all += self.push_weight * push\n\n        return pull_all, push_all\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/balanced_l1_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\nimport torch.nn as nn\n\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef balanced_l1_loss(pred,\n                     target,\n                     beta=1.0,\n                     alpha=0.5,\n                     gamma=1.5,\n                     reduction='mean'):\n    \"\"\"Calculate balanced L1 loss.\n\n    Please see the `Libra R-CNN <https://arxiv.org/pdf/1904.02701.pdf>`_\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, 4).\n        target (torch.Tensor): The learning target of the prediction with\n            shape (N, 4).\n        beta (float): The loss is a piecewise function of prediction and target\n            and ``beta`` serves as a threshold for the difference between the\n            prediction and target. Defaults to 1.0.\n        alpha (float): The denominator ``alpha`` in the balanced L1 loss.\n            Defaults to 0.5.\n        gamma (float): The ``gamma`` in the balanced L1 loss.\n            Defaults to 1.5.\n        reduction (str, optional): The method that reduces the loss to a\n            scalar. Options are \"none\", \"mean\" and \"sum\".\n\n    Returns:\n        torch.Tensor: The calculated loss\n    \"\"\"\n    assert beta > 0\n    if target.numel() == 0:\n        return pred.sum() * 0\n\n    assert pred.size() == target.size()\n\n    diff = torch.abs(pred - target)\n    b = np.e**(gamma / alpha) - 1\n    loss = torch.where(\n        diff < beta, alpha / b *\n        (b * diff + 1) * torch.log(b * diff / beta + 1) - alpha * diff,\n        gamma * diff + gamma / b - alpha * beta)\n\n    return loss\n\n\n@LOSSES.register_module()\nclass BalancedL1Loss(nn.Module):\n    \"\"\"Balanced L1 Loss.\n\n    arXiv: https://arxiv.org/pdf/1904.02701.pdf (CVPR 2019)\n\n    Args:\n        alpha (float): The denominator ``alpha`` in the balanced L1 loss.\n            Defaults to 0.5.\n        gamma (float): The ``gamma`` in the balanced L1 loss. Defaults to 1.5.\n        beta (float, optional): The loss is a piecewise function of prediction\n            and target. ``beta`` serves as a threshold for the difference\n            between the prediction and target. Defaults to 1.0.\n        reduction (str, optional): The method that reduces the loss to a\n            scalar. Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of the loss. Defaults to 1.0\n    \"\"\"\n\n    def __init__(self,\n                 alpha=0.5,\n                 gamma=1.5,\n                 beta=1.0,\n                 reduction='mean',\n                 loss_weight=1.0):\n        super(BalancedL1Loss, self).__init__()\n        self.alpha = alpha\n        self.gamma = gamma\n        self.beta = beta\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function of loss.\n\n        Args:\n            pred (torch.Tensor): The prediction with shape (N, 4).\n            target (torch.Tensor): The learning target of the prediction with\n                shape (N, 4).\n            weight (torch.Tensor, optional): Sample-wise loss weight with\n                shape (N, ).\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Options are \"none\", \"mean\" and \"sum\".\n\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_bbox = self.loss_weight * balanced_l1_loss(\n            pred,\n            target,\n            weight,\n            alpha=self.alpha,\n            gamma=self.gamma,\n            beta=self.beta,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss_bbox\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/cross_entropy_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .utils import weight_reduce_loss\n\n\ndef cross_entropy(pred,\n                  label,\n                  weight=None,\n                  reduction='mean',\n                  avg_factor=None,\n                  class_weight=None,\n                  ignore_index=-100,\n                  avg_non_ignore=False):\n    \"\"\"Calculate the CrossEntropy loss.\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the number\n            of classes.\n        label (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        reduction (str, optional): The method used to reduce the loss.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n        class_weight (list[float], optional): The weight for each class.\n        ignore_index (int | None): The label index to be ignored.\n            If None, it will be set to default value. Default: -100.\n        avg_non_ignore (bool): The flag decides to whether the loss is\n            only averaged over non-ignored targets. Default: False.\n\n    Returns:\n        torch.Tensor: The calculated loss\n    \"\"\"\n    # The default value of ignore_index is the same as F.cross_entropy\n    ignore_index = -100 if ignore_index is None else ignore_index\n    # element-wise losses\n    loss = F.cross_entropy(\n        pred,\n        label,\n        weight=class_weight,\n        reduction='none',\n        ignore_index=ignore_index)\n\n    # average loss over non-ignored elements\n    # pytorch's official cross_entropy average loss over non-ignored elements\n    # refer to https://github.com/pytorch/pytorch/blob/56b43f4fec1f76953f15a627694d4bba34588969/torch/nn/functional.py#L2660  # noqa\n    if (avg_factor is None) and avg_non_ignore and reduction == 'mean':\n        avg_factor = label.numel() - (label == ignore_index).sum().item()\n\n    # apply weights and do the reduction\n    if weight is not None:\n        weight = weight.float()\n    loss = weight_reduce_loss(\n        loss, weight=weight, reduction=reduction, avg_factor=avg_factor)\n\n    return loss\n\n\ndef _expand_onehot_labels(labels, label_weights, label_channels, ignore_index):\n    \"\"\"Expand onehot labels to match the size of prediction.\"\"\"\n    bin_labels = labels.new_full((labels.size(0), label_channels), 0)\n    valid_mask = (labels >= 0) & (labels != ignore_index)\n    inds = torch.nonzero(\n        valid_mask & (labels < label_channels), as_tuple=False)\n\n    if inds.numel() > 0:\n        bin_labels[inds, labels[inds]] = 1\n\n    valid_mask = valid_mask.view(-1, 1).expand(labels.size(0),\n                                               label_channels).float()\n    if label_weights is None:\n        bin_label_weights = valid_mask\n    else:\n        bin_label_weights = label_weights.view(-1, 1).repeat(1, label_channels)\n        bin_label_weights *= valid_mask\n\n    return bin_labels, bin_label_weights, valid_mask\n\n\ndef binary_cross_entropy(pred,\n                         label,\n                         weight=None,\n                         reduction='mean',\n                         avg_factor=None,\n                         class_weight=None,\n                         ignore_index=-100,\n                         avg_non_ignore=False):\n    \"\"\"Calculate the binary CrossEntropy loss.\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, 1) or (N, ).\n            When the shape of pred is (N, 1), label will be expanded to\n            one-hot format, and when the shape of pred is (N, ), label\n            will not be expanded to one-hot format.\n        label (torch.Tensor): The learning label of the prediction,\n            with shape (N, ).\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        reduction (str, optional): The method used to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n        class_weight (list[float], optional): The weight for each class.\n        ignore_index (int | None): The label index to be ignored.\n            If None, it will be set to default value. Default: -100.\n        avg_non_ignore (bool): The flag decides to whether the loss is\n            only averaged over non-ignored targets. Default: False.\n\n    Returns:\n        torch.Tensor: The calculated loss.\n    \"\"\"\n    # The default value of ignore_index is the same as F.cross_entropy\n    ignore_index = -100 if ignore_index is None else ignore_index\n\n    if pred.dim() != label.dim():\n        label, weight, valid_mask = _expand_onehot_labels(\n            label, weight, pred.size(-1), ignore_index)\n    else:\n        # should mask out the ignored elements\n        valid_mask = ((label >= 0) & (label != ignore_index)).float()\n        if weight is not None:\n            # The inplace writing method will have a mismatched broadcast\n            # shape error if the weight and valid_mask dimensions\n            # are inconsistent such as (B,N,1) and (B,N,C).\n            weight = weight * valid_mask\n        else:\n            weight = valid_mask\n\n    # average loss over non-ignored elements\n    if (avg_factor is None) and avg_non_ignore and reduction == 'mean':\n        avg_factor = valid_mask.sum().item()\n\n    # weighted element-wise losses\n    weight = weight.float()\n    loss = F.binary_cross_entropy_with_logits(\n        pred, label.float(), pos_weight=class_weight, reduction='none')\n    # do the reduction for the weighted loss\n    loss = weight_reduce_loss(\n        loss, weight, reduction=reduction, avg_factor=avg_factor)\n\n    return loss\n\n\ndef mask_cross_entropy(pred,\n                       target,\n                       label,\n                       reduction='mean',\n                       avg_factor=None,\n                       class_weight=None,\n                       ignore_index=None,\n                       **kwargs):\n    \"\"\"Calculate the CrossEntropy loss for masks.\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C, *), C is the\n            number of classes. The trailing * indicates arbitrary shape.\n        target (torch.Tensor): The learning label of the prediction.\n        label (torch.Tensor): ``label`` indicates the class label of the mask\n            corresponding object. This will be used to select the mask in the\n            of the class which the object belongs to when the mask prediction\n            if not class-agnostic.\n        reduction (str, optional): The method used to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n        class_weight (list[float], optional): The weight for each class.\n        ignore_index (None): Placeholder, to be consistent with other loss.\n            Default: None.\n\n    Returns:\n        torch.Tensor: The calculated loss\n\n    Example:\n        >>> N, C = 3, 11\n        >>> H, W = 2, 2\n        >>> pred = torch.randn(N, C, H, W) * 1000\n        >>> target = torch.rand(N, H, W)\n        >>> label = torch.randint(0, C, size=(N,))\n        >>> reduction = 'mean'\n        >>> avg_factor = None\n        >>> class_weights = None\n        >>> loss = mask_cross_entropy(pred, target, label, reduction,\n        >>>                           avg_factor, class_weights)\n        >>> assert loss.shape == (1,)\n    \"\"\"\n    assert ignore_index is None, 'BCE loss does not support ignore_index'\n    # TODO: handle these two reserved arguments\n    assert reduction == 'mean' and avg_factor is None\n    num_rois = pred.size()[0]\n    inds = torch.arange(0, num_rois, dtype=torch.long, device=pred.device)\n    pred_slice = pred[inds, label].squeeze(1)\n    return F.binary_cross_entropy_with_logits(\n        pred_slice, target, weight=class_weight, reduction='mean')[None]\n\n\n@LOSSES.register_module()\nclass CrossEntropyLoss(nn.Module):\n\n    def __init__(self,\n                 use_sigmoid=False,\n                 use_mask=False,\n                 reduction='mean',\n                 class_weight=None,\n                 ignore_index=None,\n                 loss_weight=1.0,\n                 avg_non_ignore=False):\n        \"\"\"CrossEntropyLoss.\n\n        Args:\n            use_sigmoid (bool, optional): Whether the prediction uses sigmoid\n                of softmax. Defaults to False.\n            use_mask (bool, optional): Whether to use mask cross entropy loss.\n                Defaults to False.\n            reduction (str, optional): . Defaults to 'mean'.\n                Options are \"none\", \"mean\" and \"sum\".\n            class_weight (list[float], optional): Weight of each class.\n                Defaults to None.\n            ignore_index (int | None): The label index to be ignored.\n                Defaults to None.\n            loss_weight (float, optional): Weight of the loss. Defaults to 1.0.\n            avg_non_ignore (bool): The flag decides to whether the loss is\n                only averaged over non-ignored targets. Default: False.\n        \"\"\"\n        super(CrossEntropyLoss, self).__init__()\n        assert (use_sigmoid is False) or (use_mask is False)\n        self.use_sigmoid = use_sigmoid\n        self.use_mask = use_mask\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.class_weight = class_weight\n        self.ignore_index = ignore_index\n        self.avg_non_ignore = avg_non_ignore\n        if ((ignore_index is not None) and not self.avg_non_ignore\n                and self.reduction == 'mean'):\n            warnings.warn(\n                'Default ``avg_non_ignore`` is False, if you would like to '\n                'ignore the certain label and average loss over non-ignore '\n                'labels, which is the same with PyTorch official '\n                'cross_entropy, set ``avg_non_ignore=True``.')\n\n        if self.use_sigmoid:\n            self.cls_criterion = binary_cross_entropy\n        elif self.use_mask:\n            self.cls_criterion = mask_cross_entropy\n        else:\n            self.cls_criterion = cross_entropy\n\n    def extra_repr(self):\n        \"\"\"Extra repr.\"\"\"\n        s = f'avg_non_ignore={self.avg_non_ignore}'\n        return s\n\n    def forward(self,\n                cls_score,\n                label,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                ignore_index=None,\n                **kwargs):\n        \"\"\"Forward function.\n\n        Args:\n            cls_score (torch.Tensor): The prediction.\n            label (torch.Tensor): The learning label of the prediction.\n            weight (torch.Tensor, optional): Sample-wise loss weight.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The method used to reduce the\n                loss. Options are \"none\", \"mean\" and \"sum\".\n            ignore_index (int | None): The label index to be ignored.\n                If not None, it will override the default value. Default: None.\n        Returns:\n            torch.Tensor: The calculated loss.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if ignore_index is None:\n            ignore_index = self.ignore_index\n\n        if self.class_weight is not None:\n            class_weight = cls_score.new_tensor(\n                self.class_weight, device=cls_score.device)\n        else:\n            class_weight = None\n        loss_cls = self.loss_weight * self.cls_criterion(\n            cls_score,\n            label,\n            weight,\n            class_weight=class_weight,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            ignore_index=ignore_index,\n            avg_non_ignore=self.avg_non_ignore,\n            **kwargs)\n        return loss_cls\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/dice_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\n\nfrom ..builder import LOSSES\nfrom .utils import weight_reduce_loss\n\n\ndef dice_loss(pred,\n              target,\n              weight=None,\n              eps=1e-3,\n              reduction='mean',\n              naive_dice=False,\n              avg_factor=None):\n    \"\"\"Calculate dice loss, there are two forms of dice loss is supported:\n\n        - the one proposed in `V-Net: Fully Convolutional Neural\n            Networks for Volumetric Medical Image Segmentation\n            <https://arxiv.org/abs/1606.04797>`_.\n        - the dice loss in which the power of the number in the\n            denominator is the first power instead of the second\n            power.\n\n    Args:\n        pred (torch.Tensor): The prediction, has a shape (n, *)\n        target (torch.Tensor): The learning label of the prediction,\n            shape (n, *), same shape of pred.\n        weight (torch.Tensor, optional): The weight of loss for each\n            prediction, has a shape (n,). Defaults to None.\n        eps (float): Avoid dividing by zero. Default: 1e-3.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'.\n            Options are \"none\", \"mean\" and \"sum\".\n        naive_dice (bool, optional): If false, use the dice\n                loss defined in the V-Net paper, otherwise, use the\n                naive dice loss in which the power of the number in the\n                denominator is the first power instead of the second\n                power.Defaults to False.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n\n    input = pred.flatten(1)\n    target = target.flatten(1).float()\n\n    a = torch.sum(input * target, 1)\n    if naive_dice:\n        b = torch.sum(input, 1)\n        c = torch.sum(target, 1)\n        d = (2 * a + eps) / (b + c + eps)\n    else:\n        b = torch.sum(input * input, 1) + eps\n        c = torch.sum(target * target, 1) + eps\n        d = (2 * a) / (b + c)\n\n    loss = 1 - d\n    if weight is not None:\n        assert weight.ndim == loss.ndim\n        assert len(weight) == len(pred)\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\n@LOSSES.register_module()\nclass DiceLoss(nn.Module):\n\n    def __init__(self,\n                 use_sigmoid=True,\n                 activate=True,\n                 reduction='mean',\n                 naive_dice=False,\n                 loss_weight=1.0,\n                 eps=1e-3):\n        \"\"\"Compute dice loss.\n\n        Args:\n            use_sigmoid (bool, optional): Whether to the prediction is\n                used for sigmoid or softmax. Defaults to True.\n            activate (bool): Whether to activate the predictions inside,\n                this will disable the inside sigmoid operation.\n                Defaults to True.\n            reduction (str, optional): The method used\n                to reduce the loss. Options are \"none\",\n                \"mean\" and \"sum\". Defaults to 'mean'.\n            naive_dice (bool, optional): If false, use the dice\n                loss defined in the V-Net paper, otherwise, use the\n                naive dice loss in which the power of the number in the\n                denominator is the first power instead of the second\n                power. Defaults to False.\n            loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n            eps (float): Avoid dividing by zero. Defaults to 1e-3.\n        \"\"\"\n\n        super(DiceLoss, self).__init__()\n        self.use_sigmoid = use_sigmoid\n        self.reduction = reduction\n        self.naive_dice = naive_dice\n        self.loss_weight = loss_weight\n        self.eps = eps\n        self.activate = activate\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                reduction_override=None,\n                avg_factor=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction, has a shape (n, *).\n            target (torch.Tensor): The label of the prediction,\n                shape (n, *), same shape of pred.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction, has a shape (n,). Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Options are \"none\", \"mean\" and \"sum\".\n\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        if self.activate:\n            if self.use_sigmoid:\n                pred = pred.sigmoid()\n            else:\n                raise NotImplementedError\n\n        loss = self.loss_weight * dice_loss(\n            pred,\n            target,\n            weight,\n            eps=self.eps,\n            reduction=reduction,\n            naive_dice=self.naive_dice,\n            avg_factor=avg_factor)\n\n        return loss\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/focal_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.ops import sigmoid_focal_loss as _sigmoid_focal_loss\n\nfrom ..builder import LOSSES\nfrom .utils import weight_reduce_loss\n\n\n# This method is only for debugging\ndef py_sigmoid_focal_loss(pred,\n                          target,\n                          weight=None,\n                          gamma=2.0,\n                          alpha=0.25,\n                          reduction='mean',\n                          avg_factor=None):\n    \"\"\"PyTorch version of `Focal Loss <https://arxiv.org/abs/1708.02002>`_.\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the\n            number of classes\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    pred_sigmoid = pred.sigmoid()\n    target = target.type_as(pred)\n    pt = (1 - pred_sigmoid) * target + pred_sigmoid * (1 - target)\n    focal_weight = (alpha * target + (1 - alpha) *\n                    (1 - target)) * pt.pow(gamma)\n    loss = F.binary_cross_entropy_with_logits(\n        pred, target, reduction='none') * focal_weight\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\ndef py_focal_loss_with_prob(pred,\n                            target,\n                            weight=None,\n                            gamma=2.0,\n                            alpha=0.25,\n                            reduction='mean',\n                            avg_factor=None):\n    \"\"\"PyTorch version of `Focal Loss <https://arxiv.org/abs/1708.02002>`_.\n    Different from `py_sigmoid_focal_loss`, this function accepts probability\n    as input.\n\n    Args:\n        pred (torch.Tensor): The prediction probability with shape (N, C),\n            C is the number of classes.\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    num_classes = pred.size(1)\n    target = F.one_hot(target, num_classes=num_classes + 1)\n    target = target[:, :num_classes]\n\n    target = target.type_as(pred)\n    pt = (1 - pred) * target + pred * (1 - target)\n    focal_weight = (alpha * target + (1 - alpha) *\n                    (1 - target)) * pt.pow(gamma)\n    loss = F.binary_cross_entropy(\n        pred, target, reduction='none') * focal_weight\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\ndef sigmoid_focal_loss(pred,\n                       target,\n                       weight=None,\n                       gamma=2.0,\n                       alpha=0.25,\n                       reduction='mean',\n                       avg_factor=None):\n    r\"\"\"A wrapper of cuda version `Focal Loss\n    <https://arxiv.org/abs/1708.02002>`_.\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the number\n            of classes.\n        target (torch.Tensor): The learning label of the prediction.\n        weight (torch.Tensor, optional): Sample-wise loss weight.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 0.25.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'. Options are \"none\", \"mean\" and \"sum\".\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    # Function.apply does not accept keyword arguments, so the decorator\n    # \"weighted_loss\" is not applicable\n    loss = _sigmoid_focal_loss(pred.contiguous(), target.contiguous(), gamma,\n                               alpha, None, 'none')\n    if weight is not None:\n        if weight.shape != loss.shape:\n            if weight.size(0) == loss.size(0):\n                # For most cases, weight is of shape (num_priors, ),\n                #  which means it does not have the second axis num_class\n                weight = weight.view(-1, 1)\n            else:\n                # Sometimes, weight per anchor per class is also needed. e.g.\n                #  in FSAF. But it may be flattened of shape\n                #  (num_priors x num_class, ), while loss is still of shape\n                #  (num_priors, num_class).\n                assert weight.numel() == loss.numel()\n                weight = weight.view(loss.size(0), -1)\n        assert weight.ndim == loss.ndim\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\n@LOSSES.register_module()\nclass FocalLoss(nn.Module):\n\n    def __init__(self,\n                 use_sigmoid=True,\n                 gamma=2.0,\n                 alpha=0.25,\n                 reduction='mean',\n                 loss_weight=1.0,\n                 activated=False):\n        \"\"\"`Focal Loss <https://arxiv.org/abs/1708.02002>`_\n\n        Args:\n            use_sigmoid (bool, optional): Whether to the prediction is\n                used for sigmoid or softmax. Defaults to True.\n            gamma (float, optional): The gamma for calculating the modulating\n                factor. Defaults to 2.0.\n            alpha (float, optional): A balanced form for Focal Loss.\n                Defaults to 0.25.\n            reduction (str, optional): The method used to reduce the loss into\n                a scalar. Defaults to 'mean'. Options are \"none\", \"mean\" and\n                \"sum\".\n            loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n            activated (bool, optional): Whether the input is activated.\n                If True, it means the input has been activated and can be\n                treated as probabilities. Else, it should be treated as logits.\n                Defaults to False.\n        \"\"\"\n        super(FocalLoss, self).__init__()\n        assert use_sigmoid is True, 'Only sigmoid focal loss supported now.'\n        self.use_sigmoid = use_sigmoid\n        self.gamma = gamma\n        self.alpha = alpha\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.activated = activated\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning label of the prediction.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Options are \"none\", \"mean\" and \"sum\".\n\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if self.use_sigmoid:\n            if self.activated:\n                calculate_loss_func = py_focal_loss_with_prob\n            else:\n                if torch.cuda.is_available() and pred.is_cuda:\n                    calculate_loss_func = sigmoid_focal_loss\n                else:\n                    num_classes = pred.size(1)\n                    target = F.one_hot(target, num_classes=num_classes + 1)\n                    target = target[:, :num_classes]\n                    calculate_loss_func = py_sigmoid_focal_loss\n\n            loss_cls = self.loss_weight * calculate_loss_func(\n                pred,\n                target,\n                weight,\n                gamma=self.gamma,\n                alpha=self.alpha,\n                reduction=reduction,\n                avg_factor=avg_factor)\n\n        else:\n            raise NotImplementedError\n        return loss_cls\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/gaussian_focal_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch.nn as nn\n\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef gaussian_focal_loss(pred, gaussian_target, alpha=2.0, gamma=4.0):\n    \"\"\"`Focal Loss <https://arxiv.org/abs/1708.02002>`_ for targets in gaussian\n    distribution.\n\n    Args:\n        pred (torch.Tensor): The prediction.\n        gaussian_target (torch.Tensor): The learning target of the prediction\n            in gaussian distribution.\n        alpha (float, optional): A balanced form for Focal Loss.\n            Defaults to 2.0.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 4.0.\n    \"\"\"\n    eps = 1e-12\n    pos_weights = gaussian_target.eq(1)\n    neg_weights = (1 - gaussian_target).pow(gamma)\n    pos_loss = -(pred + eps).log() * (1 - pred).pow(alpha) * pos_weights\n    neg_loss = -(1 - pred + eps).log() * pred.pow(alpha) * neg_weights\n    return pos_loss + neg_loss\n\n\n@LOSSES.register_module()\nclass GaussianFocalLoss(nn.Module):\n    \"\"\"GaussianFocalLoss is a variant of focal loss.\n\n    More details can be found in the `paper\n    <https://arxiv.org/abs/1808.01244>`_\n    Code is modified from `kp_utils.py\n    <https://github.com/princeton-vl/CornerNet/blob/master/models/py_utils/kp_utils.py#L152>`_  # noqa: E501\n    Please notice that the target in GaussianFocalLoss is a gaussian heatmap,\n    not 0/1 binary target.\n\n    Args:\n        alpha (float): Power of prediction.\n        gamma (float): Power of target for negative samples.\n        reduction (str): Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float): Loss weight of current loss.\n    \"\"\"\n\n    def __init__(self,\n                 alpha=2.0,\n                 gamma=4.0,\n                 reduction='mean',\n                 loss_weight=1.0):\n        super(GaussianFocalLoss, self).__init__()\n        self.alpha = alpha\n        self.gamma = gamma\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction\n                in gaussian distribution.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_reg = self.loss_weight * gaussian_focal_loss(\n            pred,\n            target,\n            weight,\n            alpha=self.alpha,\n            gamma=self.gamma,\n            reduction=reduction,\n            avg_factor=avg_factor)\n        return loss_reg\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/gfocal_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef quality_focal_loss(pred, target, beta=2.0):\n    r\"\"\"Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning\n    Qualified and Distributed Bounding Boxes for Dense Object Detection\n    <https://arxiv.org/abs/2006.04388>`_.\n\n    Args:\n        pred (torch.Tensor): Predicted joint representation of classification\n            and quality (IoU) estimation with shape (N, C), C is the number of\n            classes.\n        target (tuple([torch.Tensor])): Target category label with shape (N,)\n            and target quality label with shape (N,).\n        beta (float): The beta parameter for calculating the modulating factor.\n            Defaults to 2.0.\n\n    Returns:\n        torch.Tensor: Loss tensor with shape (N,).\n    \"\"\"\n    assert len(target) == 2, \"\"\"target for QFL must be a tuple of two elements,\n        including category label and quality label, respectively\"\"\"\n    # label denotes the category id, score denotes the quality score\n    label, score = target\n\n    # negatives are supervised by 0 quality score\n    pred_sigmoid = pred.sigmoid()\n    scale_factor = pred_sigmoid\n    zerolabel = scale_factor.new_zeros(pred.shape)\n    loss = F.binary_cross_entropy_with_logits(\n        pred, zerolabel, reduction='none') * scale_factor.pow(beta)\n\n    # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n    bg_class_ind = pred.size(1)\n    pos = ((label >= 0) & (label < bg_class_ind)).nonzero().squeeze(1)\n    pos_label = label[pos].long()\n    # positives are supervised by bbox quality (IoU) score\n    scale_factor = score[pos] - pred_sigmoid[pos, pos_label]\n    loss[pos, pos_label] = F.binary_cross_entropy_with_logits(\n        pred[pos, pos_label], score[pos],\n        reduction='none') * scale_factor.abs().pow(beta)\n\n    loss = loss.sum(dim=1, keepdim=False)\n    return loss\n\n\n@weighted_loss\ndef quality_focal_loss_with_prob(pred, target, beta=2.0):\n    r\"\"\"Quality Focal Loss (QFL) is from `Generalized Focal Loss: Learning\n    Qualified and Distributed Bounding Boxes for Dense Object Detection\n    <https://arxiv.org/abs/2006.04388>`_.\n    Different from `quality_focal_loss`, this function accepts probability\n    as input.\n\n    Args:\n        pred (torch.Tensor): Predicted joint representation of classification\n            and quality (IoU) estimation with shape (N, C), C is the number of\n            classes.\n        target (tuple([torch.Tensor])): Target category label with shape (N,)\n            and target quality label with shape (N,).\n        beta (float): The beta parameter for calculating the modulating factor.\n            Defaults to 2.0.\n\n    Returns:\n        torch.Tensor: Loss tensor with shape (N,).\n    \"\"\"\n    assert len(target) == 2, \"\"\"target for QFL must be a tuple of two elements,\n        including category label and quality label, respectively\"\"\"\n    # label denotes the category id, score denotes the quality score\n    label, score = target\n\n    # negatives are supervised by 0 quality score\n    pred_sigmoid = pred\n    scale_factor = pred_sigmoid\n    zerolabel = scale_factor.new_zeros(pred.shape)\n    loss = F.binary_cross_entropy(\n        pred, zerolabel, reduction='none') * scale_factor.pow(beta)\n\n    # FG cat_id: [0, num_classes -1], BG cat_id: num_classes\n    bg_class_ind = pred.size(1)\n    pos = ((label >= 0) & (label < bg_class_ind)).nonzero().squeeze(1)\n    pos_label = label[pos].long()\n    # positives are supervised by bbox quality (IoU) score\n    scale_factor = score[pos] - pred_sigmoid[pos, pos_label]\n    loss[pos, pos_label] = F.binary_cross_entropy(\n        pred[pos, pos_label], score[pos],\n        reduction='none') * scale_factor.abs().pow(beta)\n\n    loss = loss.sum(dim=1, keepdim=False)\n    return loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef distribution_focal_loss(pred, label):\n    r\"\"\"Distribution Focal Loss (DFL) is from `Generalized Focal Loss: Learning\n    Qualified and Distributed Bounding Boxes for Dense Object Detection\n    <https://arxiv.org/abs/2006.04388>`_.\n\n    Args:\n        pred (torch.Tensor): Predicted general distribution of bounding boxes\n            (before softmax) with shape (N, n+1), n is the max value of the\n            integral set `{0, ..., n}` in paper.\n        label (torch.Tensor): Target distance label for bounding boxes with\n            shape (N,).\n\n    Returns:\n        torch.Tensor: Loss tensor with shape (N,).\n    \"\"\"\n    dis_left = label.long()\n    dis_right = dis_left + 1\n    weight_left = dis_right.float() - label\n    weight_right = label - dis_left.float()\n    loss = F.cross_entropy(pred, dis_left, reduction='none') * weight_left \\\n        + F.cross_entropy(pred, dis_right, reduction='none') * weight_right\n    return loss\n\n\n@LOSSES.register_module()\nclass QualityFocalLoss(nn.Module):\n    r\"\"\"Quality Focal Loss (QFL) is a variant of `Generalized Focal Loss:\n    Learning Qualified and Distributed Bounding Boxes for Dense Object\n    Detection <https://arxiv.org/abs/2006.04388>`_.\n\n    Args:\n        use_sigmoid (bool): Whether sigmoid operation is conducted in QFL.\n            Defaults to True.\n        beta (float): The beta parameter for calculating the modulating factor.\n            Defaults to 2.0.\n        reduction (str): Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float): Loss weight of current loss.\n        activated (bool, optional): Whether the input is activated.\n            If True, it means the input has been activated and can be\n            treated as probabilities. Else, it should be treated as logits.\n            Defaults to False.\n    \"\"\"\n\n    def __init__(self,\n                 use_sigmoid=True,\n                 beta=2.0,\n                 reduction='mean',\n                 loss_weight=1.0,\n                 activated=False):\n        super(QualityFocalLoss, self).__init__()\n        assert use_sigmoid is True, 'Only sigmoid in QFL supported now.'\n        self.use_sigmoid = use_sigmoid\n        self.beta = beta\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.activated = activated\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): Predicted joint representation of\n                classification and quality (IoU) estimation with shape (N, C),\n                C is the number of classes.\n            target (tuple([torch.Tensor])): Target category label with shape\n                (N,) and target quality label with shape (N,).\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if self.use_sigmoid:\n            if self.activated:\n                calculate_loss_func = quality_focal_loss_with_prob\n            else:\n                calculate_loss_func = quality_focal_loss\n            loss_cls = self.loss_weight * calculate_loss_func(\n                pred,\n                target,\n                weight,\n                beta=self.beta,\n                reduction=reduction,\n                avg_factor=avg_factor)\n        else:\n            raise NotImplementedError\n        return loss_cls\n\n\n@LOSSES.register_module()\nclass DistributionFocalLoss(nn.Module):\n    r\"\"\"Distribution Focal Loss (DFL) is a variant of `Generalized Focal Loss:\n    Learning Qualified and Distributed Bounding Boxes for Dense Object\n    Detection <https://arxiv.org/abs/2006.04388>`_.\n\n    Args:\n        reduction (str): Options are `'none'`, `'mean'` and `'sum'`.\n        loss_weight (float): Loss weight of current loss.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(DistributionFocalLoss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): Predicted general distribution of bounding\n                boxes (before softmax) with shape (N, n+1), n is the max value\n                of the integral set `{0, ..., n}` in paper.\n            target (torch.Tensor): Target distance label for bounding boxes\n                with shape (N,).\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_cls = self.loss_weight * distribution_focal_loss(\n            pred, target, weight, reduction=reduction, avg_factor=avg_factor)\n        return loss_cls\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/ghm_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .utils import weight_reduce_loss\n\n\ndef _expand_onehot_labels(labels, label_weights, label_channels):\n    bin_labels = labels.new_full((labels.size(0), label_channels), 0)\n    inds = torch.nonzero(\n        (labels >= 0) & (labels < label_channels), as_tuple=False).squeeze()\n    if inds.numel() > 0:\n        bin_labels[inds, labels[inds]] = 1\n    bin_label_weights = label_weights.view(-1, 1).expand(\n        label_weights.size(0), label_channels)\n    return bin_labels, bin_label_weights\n\n\n# TODO: code refactoring to make it consistent with other losses\n@LOSSES.register_module()\nclass GHMC(nn.Module):\n    \"\"\"GHM Classification Loss.\n\n    Details of the theorem can be viewed in the paper\n    `Gradient Harmonized Single-stage Detector\n    <https://arxiv.org/abs/1811.05181>`_.\n\n    Args:\n        bins (int): Number of the unit regions for distribution calculation.\n        momentum (float): The parameter for moving average.\n        use_sigmoid (bool): Can only be true for BCE based loss now.\n        loss_weight (float): The weight of the total GHM-C loss.\n        reduction (str): Options are \"none\", \"mean\" and \"sum\".\n            Defaults to \"mean\"\n    \"\"\"\n\n    def __init__(self,\n                 bins=10,\n                 momentum=0,\n                 use_sigmoid=True,\n                 loss_weight=1.0,\n                 reduction='mean'):\n        super(GHMC, self).__init__()\n        self.bins = bins\n        self.momentum = momentum\n        edges = torch.arange(bins + 1).float() / bins\n        self.register_buffer('edges', edges)\n        self.edges[-1] += 1e-6\n        if momentum > 0:\n            acc_sum = torch.zeros(bins)\n            self.register_buffer('acc_sum', acc_sum)\n        self.use_sigmoid = use_sigmoid\n        if not self.use_sigmoid:\n            raise NotImplementedError\n        self.loss_weight = loss_weight\n        self.reduction = reduction\n\n    def forward(self,\n                pred,\n                target,\n                label_weight,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Calculate the GHM-C loss.\n\n        Args:\n            pred (float tensor of size [batch_num, class_num]):\n                The direct prediction of classification fc layer.\n            target (float tensor of size [batch_num, class_num]):\n                Binary class target for each sample.\n            label_weight (float tensor of size [batch_num, class_num]):\n                the value is 1 if the sample is valid and 0 if ignored.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        Returns:\n            The gradient harmonized loss.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        # the target should be binary class label\n        if pred.dim() != target.dim():\n            target, label_weight = _expand_onehot_labels(\n                target, label_weight, pred.size(-1))\n        target, label_weight = target.float(), label_weight.float()\n        edges = self.edges\n        mmt = self.momentum\n        weights = torch.zeros_like(pred)\n\n        # gradient length\n        g = torch.abs(pred.sigmoid().detach() - target)\n\n        valid = label_weight > 0\n        tot = max(valid.float().sum().item(), 1.0)\n        n = 0  # n valid bins\n        for i in range(self.bins):\n            inds = (g >= edges[i]) & (g < edges[i + 1]) & valid\n            num_in_bin = inds.sum().item()\n            if num_in_bin > 0:\n                if mmt > 0:\n                    self.acc_sum[i] = mmt * self.acc_sum[i] \\\n                        + (1 - mmt) * num_in_bin\n                    weights[inds] = tot / self.acc_sum[i]\n                else:\n                    weights[inds] = tot / num_in_bin\n                n += 1\n        if n > 0:\n            weights = weights / n\n\n        loss = F.binary_cross_entropy_with_logits(\n            pred, target, reduction='none')\n        loss = weight_reduce_loss(\n            loss, weights, reduction=reduction, avg_factor=tot)\n        return loss * self.loss_weight\n\n\n# TODO: code refactoring to make it consistent with other losses\n@LOSSES.register_module()\nclass GHMR(nn.Module):\n    \"\"\"GHM Regression Loss.\n\n    Details of the theorem can be viewed in the paper\n    `Gradient Harmonized Single-stage Detector\n    <https://arxiv.org/abs/1811.05181>`_.\n\n    Args:\n        mu (float): The parameter for the Authentic Smooth L1 loss.\n        bins (int): Number of the unit regions for distribution calculation.\n        momentum (float): The parameter for moving average.\n        loss_weight (float): The weight of the total GHM-R loss.\n        reduction (str): Options are \"none\", \"mean\" and \"sum\".\n            Defaults to \"mean\"\n    \"\"\"\n\n    def __init__(self,\n                 mu=0.02,\n                 bins=10,\n                 momentum=0,\n                 loss_weight=1.0,\n                 reduction='mean'):\n        super(GHMR, self).__init__()\n        self.mu = mu\n        self.bins = bins\n        edges = torch.arange(bins + 1).float() / bins\n        self.register_buffer('edges', edges)\n        self.edges[-1] = 1e3\n        self.momentum = momentum\n        if momentum > 0:\n            acc_sum = torch.zeros(bins)\n            self.register_buffer('acc_sum', acc_sum)\n        self.loss_weight = loss_weight\n        self.reduction = reduction\n\n    # TODO: support reduction parameter\n    def forward(self,\n                pred,\n                target,\n                label_weight,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Calculate the GHM-R loss.\n\n        Args:\n            pred (float tensor of size [batch_num, 4 (* class_num)]):\n                The prediction of box regression layer. Channel number can be 4\n                or 4 * class_num depending on whether it is class-agnostic.\n            target (float tensor of size [batch_num, 4 (* class_num)]):\n                The target regression values with the same size of pred.\n            label_weight (float tensor of size [batch_num, 4 (* class_num)]):\n                The weight of each sample, 0 if ignored.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        Returns:\n            The gradient harmonized loss.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        mu = self.mu\n        edges = self.edges\n        mmt = self.momentum\n\n        # ASL1 loss\n        diff = pred - target\n        loss = torch.sqrt(diff * diff + mu * mu) - mu\n\n        # gradient length\n        g = torch.abs(diff / torch.sqrt(mu * mu + diff * diff)).detach()\n        weights = torch.zeros_like(g)\n\n        valid = label_weight > 0\n        tot = max(label_weight.float().sum().item(), 1.0)\n        n = 0  # n: valid bins\n        for i in range(self.bins):\n            inds = (g >= edges[i]) & (g < edges[i + 1]) & valid\n            num_in_bin = inds.sum().item()\n            if num_in_bin > 0:\n                n += 1\n                if mmt > 0:\n                    self.acc_sum[i] = mmt * self.acc_sum[i] \\\n                        + (1 - mmt) * num_in_bin\n                    weights[inds] = tot / self.acc_sum[i]\n                else:\n                    weights[inds] = tot / num_in_bin\n        if n > 0:\n            weights /= n\n        loss = weight_reduce_loss(\n            loss, weights, reduction=reduction, avg_factor=tot)\n        return loss * self.loss_weight\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/iou_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\n\nimport mmcv\nimport torch\nimport torch.nn as nn\n\nfrom mmdet.core import bbox_overlaps\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef iou_loss(pred, target, linear=False, mode='log', eps=1e-6):\n    \"\"\"IoU loss.\n\n    Computing the IoU loss between a set of predicted bboxes and target bboxes.\n    The loss is calculated as negative log of IoU.\n\n    Args:\n        pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),\n            shape (n, 4).\n        target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).\n        linear (bool, optional): If True, use linear scale of loss instead of\n            log scale. Default: False.\n        mode (str): Loss scaling mode, including \"linear\", \"square\", and \"log\".\n            Default: 'log'\n        eps (float): Eps to avoid log(0).\n\n    Return:\n        torch.Tensor: Loss tensor.\n    \"\"\"\n    assert mode in ['linear', 'square', 'log']\n    if linear:\n        mode = 'linear'\n        warnings.warn('DeprecationWarning: Setting \"linear=True\" in '\n                      'iou_loss is deprecated, please use \"mode=`linear`\" '\n                      'instead.')\n    ious = bbox_overlaps(pred, target, is_aligned=True).clamp(min=eps)\n    if mode == 'linear':\n        loss = 1 - ious\n    elif mode == 'square':\n        loss = 1 - ious**2\n    elif mode == 'log':\n        loss = -ious.log()\n    else:\n        raise NotImplementedError\n    return loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef bounded_iou_loss(pred, target, beta=0.2, eps=1e-3):\n    \"\"\"BIoULoss.\n\n    This is an implementation of paper\n    `Improving Object Localization with Fitness NMS and Bounded IoU Loss.\n    <https://arxiv.org/abs/1711.00164>`_.\n\n    Args:\n        pred (torch.Tensor): Predicted bboxes.\n        target (torch.Tensor): Target bboxes.\n        beta (float): beta parameter in smoothl1.\n        eps (float): eps to avoid NaN.\n    \"\"\"\n    pred_ctrx = (pred[:, 0] + pred[:, 2]) * 0.5\n    pred_ctry = (pred[:, 1] + pred[:, 3]) * 0.5\n    pred_w = pred[:, 2] - pred[:, 0]\n    pred_h = pred[:, 3] - pred[:, 1]\n    with torch.no_grad():\n        target_ctrx = (target[:, 0] + target[:, 2]) * 0.5\n        target_ctry = (target[:, 1] + target[:, 3]) * 0.5\n        target_w = target[:, 2] - target[:, 0]\n        target_h = target[:, 3] - target[:, 1]\n\n    dx = target_ctrx - pred_ctrx\n    dy = target_ctry - pred_ctry\n\n    loss_dx = 1 - torch.max(\n        (target_w - 2 * dx.abs()) /\n        (target_w + 2 * dx.abs() + eps), torch.zeros_like(dx))\n    loss_dy = 1 - torch.max(\n        (target_h - 2 * dy.abs()) /\n        (target_h + 2 * dy.abs() + eps), torch.zeros_like(dy))\n    loss_dw = 1 - torch.min(target_w / (pred_w + eps), pred_w /\n                            (target_w + eps))\n    loss_dh = 1 - torch.min(target_h / (pred_h + eps), pred_h /\n                            (target_h + eps))\n    # view(..., -1) does not work for empty tensor\n    loss_comb = torch.stack([loss_dx, loss_dy, loss_dw, loss_dh],\n                            dim=-1).flatten(1)\n\n    loss = torch.where(loss_comb < beta, 0.5 * loss_comb * loss_comb / beta,\n                       loss_comb - 0.5 * beta)\n    return loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef giou_loss(pred, target, eps=1e-7):\n    r\"\"\"`Generalized Intersection over Union: A Metric and A Loss for Bounding\n    Box Regression <https://arxiv.org/abs/1902.09630>`_.\n\n    Args:\n        pred (torch.Tensor): Predicted bboxes of format (x1, y1, x2, y2),\n            shape (n, 4).\n        target (torch.Tensor): Corresponding gt bboxes, shape (n, 4).\n        eps (float): Eps to avoid log(0).\n\n    Return:\n        Tensor: Loss tensor.\n    \"\"\"\n    gious = bbox_overlaps(pred, target, mode='giou', is_aligned=True, eps=eps)\n    loss = 1 - gious\n    return loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef diou_loss(pred, target, eps=1e-7):\n    r\"\"\"`Implementation of Distance-IoU Loss: Faster and Better\n    Learning for Bounding Box Regression, https://arxiv.org/abs/1911.08287`_.\n\n    Code is modified from https://github.com/Zzh-tju/DIoU.\n\n    Args:\n        pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),\n            shape (n, 4).\n        target (Tensor): Corresponding gt bboxes, shape (n, 4).\n        eps (float): Eps to avoid log(0).\n    Return:\n        Tensor: Loss tensor.\n    \"\"\"\n    # overlap\n    lt = torch.max(pred[:, :2], target[:, :2])\n    rb = torch.min(pred[:, 2:], target[:, 2:])\n    wh = (rb - lt).clamp(min=0)\n    overlap = wh[:, 0] * wh[:, 1]\n\n    # union\n    ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])\n    ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])\n    union = ap + ag - overlap + eps\n\n    # IoU\n    ious = overlap / union\n\n    # enclose area\n    enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])\n    enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])\n    enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)\n\n    cw = enclose_wh[:, 0]\n    ch = enclose_wh[:, 1]\n\n    c2 = cw**2 + ch**2 + eps\n\n    b1_x1, b1_y1 = pred[:, 0], pred[:, 1]\n    b1_x2, b1_y2 = pred[:, 2], pred[:, 3]\n    b2_x1, b2_y1 = target[:, 0], target[:, 1]\n    b2_x2, b2_y2 = target[:, 2], target[:, 3]\n\n    left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4\n    right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4\n    rho2 = left + right\n\n    # DIoU\n    dious = ious - rho2 / c2\n    loss = 1 - dious\n    return loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef ciou_loss(pred, target, eps=1e-7):\n    r\"\"\"`Implementation of paper `Enhancing Geometric Factors into\n    Model Learning and Inference for Object Detection and Instance\n    Segmentation <https://arxiv.org/abs/2005.03572>`_.\n\n    Code is modified from https://github.com/Zzh-tju/CIoU.\n\n    Args:\n        pred (Tensor): Predicted bboxes of format (x1, y1, x2, y2),\n            shape (n, 4).\n        target (Tensor): Corresponding gt bboxes, shape (n, 4).\n        eps (float): Eps to avoid log(0).\n    Return:\n        Tensor: Loss tensor.\n    \"\"\"\n    # overlap\n    lt = torch.max(pred[:, :2], target[:, :2])\n    rb = torch.min(pred[:, 2:], target[:, 2:])\n    wh = (rb - lt).clamp(min=0)\n    overlap = wh[:, 0] * wh[:, 1]\n\n    # union\n    ap = (pred[:, 2] - pred[:, 0]) * (pred[:, 3] - pred[:, 1])\n    ag = (target[:, 2] - target[:, 0]) * (target[:, 3] - target[:, 1])\n    union = ap + ag - overlap + eps\n\n    # IoU\n    ious = overlap / union\n\n    # enclose area\n    enclose_x1y1 = torch.min(pred[:, :2], target[:, :2])\n    enclose_x2y2 = torch.max(pred[:, 2:], target[:, 2:])\n    enclose_wh = (enclose_x2y2 - enclose_x1y1).clamp(min=0)\n\n    cw = enclose_wh[:, 0]\n    ch = enclose_wh[:, 1]\n\n    c2 = cw**2 + ch**2 + eps\n\n    b1_x1, b1_y1 = pred[:, 0], pred[:, 1]\n    b1_x2, b1_y2 = pred[:, 2], pred[:, 3]\n    b2_x1, b2_y1 = target[:, 0], target[:, 1]\n    b2_x2, b2_y2 = target[:, 2], target[:, 3]\n\n    w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps\n    w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps\n\n    left = ((b2_x1 + b2_x2) - (b1_x1 + b1_x2))**2 / 4\n    right = ((b2_y1 + b2_y2) - (b1_y1 + b1_y2))**2 / 4\n    rho2 = left + right\n\n    factor = 4 / math.pi**2\n    v = factor * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2)\n\n    with torch.no_grad():\n        alpha = (ious > 0.5).float() * v / (1 - ious + v)\n\n    # CIoU\n    cious = ious - (rho2 / c2 + alpha * v)\n    loss = 1 - cious.clamp(min=-1.0, max=1.0)\n    return loss\n\n\n@LOSSES.register_module()\nclass IoULoss(nn.Module):\n    \"\"\"IoULoss.\n\n    Computing the IoU loss between a set of predicted bboxes and target bboxes.\n\n    Args:\n        linear (bool): If True, use linear scale of loss else determined\n            by mode. Default: False.\n        eps (float): Eps to avoid log(0).\n        reduction (str): Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float): Weight of loss.\n        mode (str): Loss scaling mode, including \"linear\", \"square\", and \"log\".\n            Default: 'log'\n    \"\"\"\n\n    def __init__(self,\n                 linear=False,\n                 eps=1e-6,\n                 reduction='mean',\n                 loss_weight=1.0,\n                 mode='log'):\n        super(IoULoss, self).__init__()\n        assert mode in ['linear', 'square', 'log']\n        if linear:\n            mode = 'linear'\n            warnings.warn('DeprecationWarning: Setting \"linear=True\" in '\n                          'IOULoss is deprecated, please use \"mode=`linear`\" '\n                          'instead.')\n        self.mode = mode\n        self.linear = linear\n        self.eps = eps\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None. Options are \"none\", \"mean\" and \"sum\".\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if (weight is not None) and (not torch.any(weight > 0)) and (\n                reduction != 'none'):\n            if pred.dim() == weight.dim() + 1:\n                weight = weight.unsqueeze(1)\n            return (pred * weight).sum()  # 0\n        if weight is not None and weight.dim() > 1:\n            # TODO: remove this in the future\n            # reduce the weight of shape (n, 4) to (n,) to match the\n            # iou_loss of shape (n,)\n            assert weight.shape == pred.shape\n            weight = weight.mean(-1)\n        loss = self.loss_weight * iou_loss(\n            pred,\n            target,\n            weight,\n            mode=self.mode,\n            eps=self.eps,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss\n\n\n@LOSSES.register_module()\nclass BoundedIoULoss(nn.Module):\n\n    def __init__(self, beta=0.2, eps=1e-3, reduction='mean', loss_weight=1.0):\n        super(BoundedIoULoss, self).__init__()\n        self.beta = beta\n        self.eps = eps\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        if weight is not None and not torch.any(weight > 0):\n            if pred.dim() == weight.dim() + 1:\n                weight = weight.unsqueeze(1)\n            return (pred * weight).sum()  # 0\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss = self.loss_weight * bounded_iou_loss(\n            pred,\n            target,\n            weight,\n            beta=self.beta,\n            eps=self.eps,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss\n\n\n@LOSSES.register_module()\nclass GIoULoss(nn.Module):\n\n    def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):\n        super(GIoULoss, self).__init__()\n        self.eps = eps\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        if weight is not None and not torch.any(weight > 0):\n            if pred.dim() == weight.dim() + 1:\n                weight = weight.unsqueeze(1)\n            return (pred * weight).sum()  # 0\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if weight is not None and weight.dim() > 1:\n            # TODO: remove this in the future\n            # reduce the weight of shape (n, 4) to (n,) to match the\n            # giou_loss of shape (n,)\n            assert weight.shape == pred.shape\n            weight = weight.mean(-1)\n        loss = self.loss_weight * giou_loss(\n            pred,\n            target,\n            weight,\n            eps=self.eps,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss\n\n\n@LOSSES.register_module()\nclass DIoULoss(nn.Module):\n\n    def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):\n        super(DIoULoss, self).__init__()\n        self.eps = eps\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        if weight is not None and not torch.any(weight > 0):\n            if pred.dim() == weight.dim() + 1:\n                weight = weight.unsqueeze(1)\n            return (pred * weight).sum()  # 0\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if weight is not None and weight.dim() > 1:\n            # TODO: remove this in the future\n            # reduce the weight of shape (n, 4) to (n,) to match the\n            # giou_loss of shape (n,)\n            assert weight.shape == pred.shape\n            weight = weight.mean(-1)\n        loss = self.loss_weight * diou_loss(\n            pred,\n            target,\n            weight,\n            eps=self.eps,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss\n\n\n@LOSSES.register_module()\nclass CIoULoss(nn.Module):\n\n    def __init__(self, eps=1e-6, reduction='mean', loss_weight=1.0):\n        super(CIoULoss, self).__init__()\n        self.eps = eps\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        if weight is not None and not torch.any(weight > 0):\n            if pred.dim() == weight.dim() + 1:\n                weight = weight.unsqueeze(1)\n            return (pred * weight).sum()  # 0\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if weight is not None and weight.dim() > 1:\n            # TODO: remove this in the future\n            # reduce the weight of shape (n, 4) to (n,) to match the\n            # giou_loss of shape (n,)\n            assert weight.shape == pred.shape\n            weight = weight.mean(-1)\n        loss = self.loss_weight * ciou_loss(\n            pred,\n            target,\n            weight,\n            eps=self.eps,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/kd_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef knowledge_distillation_kl_div_loss(pred,\n                                       soft_label,\n                                       T,\n                                       detach_target=True):\n    r\"\"\"Loss function for knowledge distilling using KL divergence.\n\n    Args:\n        pred (Tensor): Predicted logits with shape (N, n + 1).\n        soft_label (Tensor): Target logits with shape (N, N + 1).\n        T (int): Temperature for distillation.\n        detach_target (bool): Remove soft_label from automatic differentiation\n\n    Returns:\n        torch.Tensor: Loss tensor with shape (N,).\n    \"\"\"\n    assert pred.size() == soft_label.size()\n    target = F.softmax(soft_label / T, dim=1)\n    if detach_target:\n        target = target.detach()\n\n    kd_loss = F.kl_div(\n        F.log_softmax(pred / T, dim=1), target, reduction='none').mean(1) * (\n            T * T)\n\n    return kd_loss\n\n\n@LOSSES.register_module()\nclass KnowledgeDistillationKLDivLoss(nn.Module):\n    \"\"\"Loss function for knowledge distilling using KL divergence.\n\n    Args:\n        reduction (str): Options are `'none'`, `'mean'` and `'sum'`.\n        loss_weight (float): Loss weight of current loss.\n        T (int): Temperature for distillation.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0, T=10):\n        super(KnowledgeDistillationKLDivLoss, self).__init__()\n        assert T >= 1\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.T = T\n\n    def forward(self,\n                pred,\n                soft_label,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (Tensor): Predicted logits with shape (N, n + 1).\n            soft_label (Tensor): Target logits with shape (N, N + 1).\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n\n        loss_kd = self.loss_weight * knowledge_distillation_kl_div_loss(\n            pred,\n            soft_label,\n            weight,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            T=self.T)\n\n        return loss_kd\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/mse_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@weighted_loss\ndef mse_loss(pred, target):\n    \"\"\"Wrapper of mse loss.\"\"\"\n    return F.mse_loss(pred, target, reduction='none')\n\n\n@LOSSES.register_module()\nclass MSELoss(nn.Module):\n    \"\"\"MSELoss.\n\n    Args:\n        reduction (str, optional): The method that reduces the loss to a\n            scalar. Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of the loss. Defaults to 1.0\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super().__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function of loss.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            weight (torch.Tensor, optional): Weight of the loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss = self.loss_weight * mse_loss(\n            pred, target, weight, reduction=reduction, avg_factor=avg_factor)\n        return loss\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/pisa_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.core import bbox_overlaps\n\n\n@mmcv.jit(derivate=True, coderize=True)\ndef isr_p(cls_score,\n          bbox_pred,\n          bbox_targets,\n          rois,\n          sampling_results,\n          loss_cls,\n          bbox_coder,\n          k=2,\n          bias=0,\n          num_class=80):\n    \"\"\"Importance-based Sample Reweighting (ISR_P), positive part.\n\n    Args:\n        cls_score (Tensor): Predicted classification scores.\n        bbox_pred (Tensor): Predicted bbox deltas.\n        bbox_targets (tuple[Tensor]): A tuple of bbox targets, the are\n            labels, label_weights, bbox_targets, bbox_weights, respectively.\n        rois (Tensor): Anchors (single_stage) in shape (n, 4) or RoIs\n            (two_stage) in shape (n, 5).\n        sampling_results (obj): Sampling results.\n        loss_cls (func): Classification loss func of the head.\n        bbox_coder (obj): BBox coder of the head.\n        k (float): Power of the non-linear mapping.\n        bias (float): Shift of the non-linear mapping.\n        num_class (int): Number of classes, default: 80.\n\n    Return:\n        tuple([Tensor]): labels, imp_based_label_weights, bbox_targets,\n            bbox_target_weights\n    \"\"\"\n\n    labels, label_weights, bbox_targets, bbox_weights = bbox_targets\n    pos_label_inds = ((labels >= 0) &\n                      (labels < num_class)).nonzero().reshape(-1)\n    pos_labels = labels[pos_label_inds]\n\n    # if no positive samples, return the original targets\n    num_pos = float(pos_label_inds.size(0))\n    if num_pos == 0:\n        return labels, label_weights, bbox_targets, bbox_weights\n\n    # merge pos_assigned_gt_inds of per image to a single tensor\n    gts = list()\n    last_max_gt = 0\n    for i in range(len(sampling_results)):\n        gt_i = sampling_results[i].pos_assigned_gt_inds\n        gts.append(gt_i + last_max_gt)\n        if len(gt_i) != 0:\n            last_max_gt = gt_i.max() + 1\n    gts = torch.cat(gts)\n    assert len(gts) == num_pos\n\n    cls_score = cls_score.detach()\n    bbox_pred = bbox_pred.detach()\n\n    # For single stage detectors, rois here indicate anchors, in shape (N, 4)\n    # For two stage detectors, rois are in shape (N, 5)\n    if rois.size(-1) == 5:\n        pos_rois = rois[pos_label_inds][:, 1:]\n    else:\n        pos_rois = rois[pos_label_inds]\n\n    if bbox_pred.size(-1) > 4:\n        bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)\n        pos_delta_pred = bbox_pred[pos_label_inds, pos_labels].view(-1, 4)\n    else:\n        pos_delta_pred = bbox_pred[pos_label_inds].view(-1, 4)\n\n    # compute iou of the predicted bbox and the corresponding GT\n    pos_delta_target = bbox_targets[pos_label_inds].view(-1, 4)\n    pos_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_pred)\n    target_bbox_pred = bbox_coder.decode(pos_rois, pos_delta_target)\n    ious = bbox_overlaps(pos_bbox_pred, target_bbox_pred, is_aligned=True)\n\n    pos_imp_weights = label_weights[pos_label_inds]\n    # Two steps to compute IoU-HLR. Samples are first sorted by IoU locally,\n    # then sorted again within the same-rank group\n    max_l_num = pos_labels.bincount().max()\n    for label in pos_labels.unique():\n        l_inds = (pos_labels == label).nonzero().view(-1)\n        l_gts = gts[l_inds]\n        for t in l_gts.unique():\n            t_inds = l_inds[l_gts == t]\n            t_ious = ious[t_inds]\n            _, t_iou_rank_idx = t_ious.sort(descending=True)\n            _, t_iou_rank = t_iou_rank_idx.sort()\n            ious[t_inds] += max_l_num - t_iou_rank.float()\n        l_ious = ious[l_inds]\n        _, l_iou_rank_idx = l_ious.sort(descending=True)\n        _, l_iou_rank = l_iou_rank_idx.sort()  # IoU-HLR\n        # linearly map HLR to label weights\n        pos_imp_weights[l_inds] *= (max_l_num - l_iou_rank.float()) / max_l_num\n\n    pos_imp_weights = (bias + pos_imp_weights * (1 - bias)).pow(k)\n\n    # normalize to make the new weighted loss value equal to the original loss\n    pos_loss_cls = loss_cls(\n        cls_score[pos_label_inds], pos_labels, reduction_override='none')\n    if pos_loss_cls.dim() > 1:\n        ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds][:,\n                                                                        None]\n        new_pos_loss_cls = pos_loss_cls * pos_imp_weights[:, None]\n    else:\n        ori_pos_loss_cls = pos_loss_cls * label_weights[pos_label_inds]\n        new_pos_loss_cls = pos_loss_cls * pos_imp_weights\n    pos_loss_cls_ratio = ori_pos_loss_cls.sum() / new_pos_loss_cls.sum()\n    pos_imp_weights = pos_imp_weights * pos_loss_cls_ratio\n    label_weights[pos_label_inds] = pos_imp_weights\n\n    bbox_targets = labels, label_weights, bbox_targets, bbox_weights\n    return bbox_targets\n\n\n@mmcv.jit(derivate=True, coderize=True)\ndef carl_loss(cls_score,\n              labels,\n              bbox_pred,\n              bbox_targets,\n              loss_bbox,\n              k=1,\n              bias=0.2,\n              avg_factor=None,\n              sigmoid=False,\n              num_class=80):\n    \"\"\"Classification-Aware Regression Loss (CARL).\n\n    Args:\n        cls_score (Tensor): Predicted classification scores.\n        labels (Tensor): Targets of classification.\n        bbox_pred (Tensor): Predicted bbox deltas.\n        bbox_targets (Tensor): Target of bbox regression.\n        loss_bbox (func): Regression loss func of the head.\n        bbox_coder (obj): BBox coder of the head.\n        k (float): Power of the non-linear mapping.\n        bias (float): Shift of the non-linear mapping.\n        avg_factor (int): Average factor used in regression loss.\n        sigmoid (bool): Activation of the classification score.\n        num_class (int): Number of classes, default: 80.\n\n    Return:\n        dict: CARL loss dict.\n    \"\"\"\n    pos_label_inds = ((labels >= 0) &\n                      (labels < num_class)).nonzero().reshape(-1)\n    if pos_label_inds.numel() == 0:\n        return dict(loss_carl=cls_score.sum()[None] * 0.)\n    pos_labels = labels[pos_label_inds]\n\n    # multiply pos_cls_score with the corresponding bbox weight\n    # and remain gradient\n    if sigmoid:\n        pos_cls_score = cls_score.sigmoid()[pos_label_inds, pos_labels]\n    else:\n        pos_cls_score = cls_score.softmax(-1)[pos_label_inds, pos_labels]\n    carl_loss_weights = (bias + (1 - bias) * pos_cls_score).pow(k)\n\n    # normalize carl_loss_weight to make its sum equal to num positive\n    num_pos = float(pos_cls_score.size(0))\n    weight_ratio = num_pos / carl_loss_weights.sum()\n    carl_loss_weights *= weight_ratio\n\n    if avg_factor is None:\n        avg_factor = bbox_targets.size(0)\n    # if is class agnostic, bbox pred is in shape (N, 4)\n    # otherwise, bbox pred is in shape (N, #classes, 4)\n    if bbox_pred.size(-1) > 4:\n        bbox_pred = bbox_pred.view(bbox_pred.size(0), -1, 4)\n        pos_bbox_preds = bbox_pred[pos_label_inds, pos_labels]\n    else:\n        pos_bbox_preds = bbox_pred[pos_label_inds]\n    ori_loss_reg = loss_bbox(\n        pos_bbox_preds,\n        bbox_targets[pos_label_inds],\n        reduction_override='none') / avg_factor\n    loss_carl = (ori_loss_reg * carl_loss_weights[:, None]).sum()\n    return dict(loss_carl=loss_carl[None])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/seesaw_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .accuracy import accuracy\nfrom .cross_entropy_loss import cross_entropy\nfrom .utils import weight_reduce_loss\n\n\ndef seesaw_ce_loss(cls_score,\n                   labels,\n                   label_weights,\n                   cum_samples,\n                   num_classes,\n                   p,\n                   q,\n                   eps,\n                   reduction='mean',\n                   avg_factor=None):\n    \"\"\"Calculate the Seesaw CrossEntropy loss.\n\n    Args:\n        cls_score (torch.Tensor): The prediction with shape (N, C),\n             C is the number of classes.\n        labels (torch.Tensor): The learning label of the prediction.\n        label_weights (torch.Tensor): Sample-wise loss weight.\n        cum_samples (torch.Tensor): Cumulative samples for each category.\n        num_classes (int): The number of classes.\n        p (float): The ``p`` in the mitigation factor.\n        q (float): The ``q`` in the compenstation factor.\n        eps (float): The minimal value of divisor to smooth\n             the computation of compensation factor\n        reduction (str, optional): The method used to reduce the loss.\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n\n    Returns:\n        torch.Tensor: The calculated loss\n    \"\"\"\n    assert cls_score.size(-1) == num_classes\n    assert len(cum_samples) == num_classes\n\n    onehot_labels = F.one_hot(labels, num_classes)\n    seesaw_weights = cls_score.new_ones(onehot_labels.size())\n\n    # mitigation factor\n    if p > 0:\n        sample_ratio_matrix = cum_samples[None, :].clamp(\n            min=1) / cum_samples[:, None].clamp(min=1)\n        index = (sample_ratio_matrix < 1.0).float()\n        sample_weights = sample_ratio_matrix.pow(p) * index + (1 - index)\n        mitigation_factor = sample_weights[labels.long(), :]\n        seesaw_weights = seesaw_weights * mitigation_factor\n\n    # compensation factor\n    if q > 0:\n        scores = F.softmax(cls_score.detach(), dim=1)\n        self_scores = scores[\n            torch.arange(0, len(scores)).to(scores.device).long(),\n            labels.long()]\n        score_matrix = scores / self_scores[:, None].clamp(min=eps)\n        index = (score_matrix > 1.0).float()\n        compensation_factor = score_matrix.pow(q) * index + (1 - index)\n        seesaw_weights = seesaw_weights * compensation_factor\n\n    cls_score = cls_score + (seesaw_weights.log() * (1 - onehot_labels))\n\n    loss = F.cross_entropy(cls_score, labels, weight=None, reduction='none')\n\n    if label_weights is not None:\n        label_weights = label_weights.float()\n    loss = weight_reduce_loss(\n        loss, weight=label_weights, reduction=reduction, avg_factor=avg_factor)\n    return loss\n\n\n@LOSSES.register_module()\nclass SeesawLoss(nn.Module):\n    \"\"\"\n    Seesaw Loss for Long-Tailed Instance Segmentation (CVPR 2021)\n    arXiv: https://arxiv.org/abs/2008.10032\n\n    Args:\n        use_sigmoid (bool, optional): Whether the prediction uses sigmoid\n             of softmax. Only False is supported.\n        p (float, optional): The ``p`` in the mitigation factor.\n             Defaults to 0.8.\n        q (float, optional): The ``q`` in the compenstation factor.\n             Defaults to 2.0.\n        num_classes (int, optional): The number of classes.\n             Default to 1203 for LVIS v1 dataset.\n        eps (float, optional): The minimal value of divisor to smooth\n             the computation of compensation factor\n        reduction (str, optional): The method that reduces the loss to a\n             scalar. Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of the loss. Defaults to 1.0\n        return_dict (bool, optional): Whether return the losses as a dict.\n             Default to True.\n    \"\"\"\n\n    def __init__(self,\n                 use_sigmoid=False,\n                 p=0.8,\n                 q=2.0,\n                 num_classes=1203,\n                 eps=1e-2,\n                 reduction='mean',\n                 loss_weight=1.0,\n                 return_dict=True):\n        super(SeesawLoss, self).__init__()\n        assert not use_sigmoid\n        self.use_sigmoid = False\n        self.p = p\n        self.q = q\n        self.num_classes = num_classes\n        self.eps = eps\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n        self.return_dict = return_dict\n\n        # 0 for pos, 1 for neg\n        self.cls_criterion = seesaw_ce_loss\n\n        # cumulative samples for each category\n        self.register_buffer(\n            'cum_samples',\n            torch.zeros(self.num_classes + 1, dtype=torch.float))\n\n        # custom output channels of the classifier\n        self.custom_cls_channels = True\n        # custom activation of cls_score\n        self.custom_activation = True\n        # custom accuracy of the classsifier\n        self.custom_accuracy = True\n\n    def _split_cls_score(self, cls_score):\n        # split cls_score to cls_score_classes and cls_score_objectness\n        assert cls_score.size(-1) == self.num_classes + 2\n        cls_score_classes = cls_score[..., :-2]\n        cls_score_objectness = cls_score[..., -2:]\n        return cls_score_classes, cls_score_objectness\n\n    def get_cls_channels(self, num_classes):\n        \"\"\"Get custom classification channels.\n\n        Args:\n            num_classes (int): The number of classes.\n\n        Returns:\n            int: The custom classification channels.\n        \"\"\"\n        assert num_classes == self.num_classes\n        return num_classes + 2\n\n    def get_activation(self, cls_score):\n        \"\"\"Get custom activation of cls_score.\n\n        Args:\n            cls_score (torch.Tensor): The prediction with shape (N, C + 2).\n\n        Returns:\n            torch.Tensor: The custom activation of cls_score with shape\n                 (N, C + 1).\n        \"\"\"\n        cls_score_classes, cls_score_objectness = self._split_cls_score(\n            cls_score)\n        score_classes = F.softmax(cls_score_classes, dim=-1)\n        score_objectness = F.softmax(cls_score_objectness, dim=-1)\n        score_pos = score_objectness[..., [0]]\n        score_neg = score_objectness[..., [1]]\n        score_classes = score_classes * score_pos\n        scores = torch.cat([score_classes, score_neg], dim=-1)\n        return scores\n\n    def get_accuracy(self, cls_score, labels):\n        \"\"\"Get custom accuracy w.r.t. cls_score and labels.\n\n        Args:\n            cls_score (torch.Tensor): The prediction with shape (N, C + 2).\n            labels (torch.Tensor): The learning label of the prediction.\n\n        Returns:\n            Dict [str, torch.Tensor]: The accuracy for objectness and classes,\n                 respectively.\n        \"\"\"\n        pos_inds = labels < self.num_classes\n        obj_labels = (labels == self.num_classes).long()\n        cls_score_classes, cls_score_objectness = self._split_cls_score(\n            cls_score)\n        acc_objectness = accuracy(cls_score_objectness, obj_labels)\n        acc_classes = accuracy(cls_score_classes[pos_inds], labels[pos_inds])\n        acc = dict()\n        acc['acc_objectness'] = acc_objectness\n        acc['acc_classes'] = acc_classes\n        return acc\n\n    def forward(self,\n                cls_score,\n                labels,\n                label_weights=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            cls_score (torch.Tensor): The prediction with shape (N, C + 2).\n            labels (torch.Tensor): The learning label of the prediction.\n            label_weights (torch.Tensor, optional): Sample-wise loss weight.\n            avg_factor (int, optional): Average factor that is used to average\n                 the loss. Defaults to None.\n            reduction (str, optional): The method used to reduce the loss.\n                 Options are \"none\", \"mean\" and \"sum\".\n        Returns:\n            torch.Tensor | Dict [str, torch.Tensor]:\n                 if return_dict == False: The calculated loss |\n                 if return_dict == True: The dict of calculated losses\n                 for objectness and classes, respectively.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        assert cls_score.size(-1) == self.num_classes + 2\n        pos_inds = labels < self.num_classes\n        # 0 for pos, 1 for neg\n        obj_labels = (labels == self.num_classes).long()\n\n        # accumulate the samples for each category\n        unique_labels = labels.unique()\n        for u_l in unique_labels:\n            inds_ = labels == u_l.item()\n            self.cum_samples[u_l] += inds_.sum()\n\n        if label_weights is not None:\n            label_weights = label_weights.float()\n        else:\n            label_weights = labels.new_ones(labels.size(), dtype=torch.float)\n\n        cls_score_classes, cls_score_objectness = self._split_cls_score(\n            cls_score)\n        # calculate loss_cls_classes (only need pos samples)\n        if pos_inds.sum() > 0:\n            loss_cls_classes = self.loss_weight * self.cls_criterion(\n                cls_score_classes[pos_inds], labels[pos_inds],\n                label_weights[pos_inds], self.cum_samples[:self.num_classes],\n                self.num_classes, self.p, self.q, self.eps, reduction,\n                avg_factor)\n        else:\n            loss_cls_classes = cls_score_classes[pos_inds].sum()\n        # calculate loss_cls_objectness\n        loss_cls_objectness = self.loss_weight * cross_entropy(\n            cls_score_objectness, obj_labels, label_weights, reduction,\n            avg_factor)\n\n        if self.return_dict:\n            loss_cls = dict()\n            loss_cls['loss_cls_objectness'] = loss_cls_objectness\n            loss_cls['loss_cls_classes'] = loss_cls_classes\n        else:\n            loss_cls = loss_cls_classes + loss_cls_objectness\n        return loss_cls\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/smooth_l1_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\nimport torch.nn as nn\n\nfrom ..builder import LOSSES\nfrom .utils import weighted_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef smooth_l1_loss(pred, target, beta=1.0):\n    \"\"\"Smooth L1 loss.\n\n    Args:\n        pred (torch.Tensor): The prediction.\n        target (torch.Tensor): The learning target of the prediction.\n        beta (float, optional): The threshold in the piecewise function.\n            Defaults to 1.0.\n\n    Returns:\n        torch.Tensor: Calculated loss\n    \"\"\"\n    assert beta > 0\n    if target.numel() == 0:\n        return pred.sum() * 0\n\n    assert pred.size() == target.size()\n    diff = torch.abs(pred - target)\n    loss = torch.where(diff < beta, 0.5 * diff * diff / beta,\n                       diff - 0.5 * beta)\n    return loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\n@weighted_loss\ndef l1_loss(pred, target):\n    \"\"\"L1 loss.\n\n    Args:\n        pred (torch.Tensor): The prediction.\n        target (torch.Tensor): The learning target of the prediction.\n\n    Returns:\n        torch.Tensor: Calculated loss\n    \"\"\"\n    if target.numel() == 0:\n        return pred.sum() * 0\n\n    assert pred.size() == target.size()\n    loss = torch.abs(pred - target)\n    return loss\n\n\n@LOSSES.register_module()\nclass SmoothL1Loss(nn.Module):\n    \"\"\"Smooth L1 loss.\n\n    Args:\n        beta (float, optional): The threshold in the piecewise function.\n            Defaults to 1.0.\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\". Defaults to \"mean\".\n        loss_weight (float, optional): The weight of loss.\n    \"\"\"\n\n    def __init__(self, beta=1.0, reduction='mean', loss_weight=1.0):\n        super(SmoothL1Loss, self).__init__()\n        self.beta = beta\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None,\n                **kwargs):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_bbox = self.loss_weight * smooth_l1_loss(\n            pred,\n            target,\n            weight,\n            beta=self.beta,\n            reduction=reduction,\n            avg_factor=avg_factor,\n            **kwargs)\n        return loss_bbox\n\n\n@LOSSES.register_module()\nclass L1Loss(nn.Module):\n    \"\"\"L1 loss.\n\n    Args:\n        reduction (str, optional): The method to reduce the loss.\n            Options are \"none\", \"mean\" and \"sum\".\n        loss_weight (float, optional): The weight of loss.\n    \"\"\"\n\n    def __init__(self, reduction='mean', loss_weight=1.0):\n        super(L1Loss, self).__init__()\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Defaults to None.\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        loss_bbox = self.loss_weight * l1_loss(\n            pred, target, weight, reduction=reduction, avg_factor=avg_factor)\n        return loss_bbox\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport functools\n\nimport mmcv\nimport torch\nimport torch.nn.functional as F\n\n\ndef reduce_loss(loss, reduction):\n    \"\"\"Reduce loss as specified.\n\n    Args:\n        loss (Tensor): Elementwise loss tensor.\n        reduction (str): Options are \"none\", \"mean\" and \"sum\".\n\n    Return:\n        Tensor: Reduced loss tensor.\n    \"\"\"\n    reduction_enum = F._Reduction.get_enum(reduction)\n    # none: 0, elementwise_mean:1, sum: 2\n    if reduction_enum == 0:\n        return loss\n    elif reduction_enum == 1:\n        return loss.mean()\n    elif reduction_enum == 2:\n        return loss.sum()\n\n\n@mmcv.jit(derivate=True, coderize=True)\ndef weight_reduce_loss(loss, weight=None, reduction='mean', avg_factor=None):\n    \"\"\"Apply element-wise weight and reduce loss.\n\n    Args:\n        loss (Tensor): Element-wise loss.\n        weight (Tensor): Element-wise weights.\n        reduction (str): Same as built-in losses of PyTorch.\n        avg_factor (float): Average factor when computing the mean of losses.\n\n    Returns:\n        Tensor: Processed loss values.\n    \"\"\"\n    # if weight is specified, apply element-wise weight\n    if weight is not None:\n        loss = loss * weight\n\n    # if avg_factor is not specified, just reduce the loss\n    if avg_factor is None:\n        loss = reduce_loss(loss, reduction)\n    else:\n        # if reduction is mean, then average the loss by avg_factor\n        if reduction == 'mean':\n            # Avoid causing ZeroDivisionError when avg_factor is 0.0,\n            # i.e., all labels of an image belong to ignore index.\n            eps = torch.finfo(torch.float32).eps\n            loss = loss.sum() / (avg_factor + eps)\n        # if reduction is 'none', then do nothing, otherwise raise an error\n        elif reduction != 'none':\n            raise ValueError('avg_factor can not be used with reduction=\"sum\"')\n    return loss\n\n\ndef weighted_loss(loss_func):\n    \"\"\"Create a weighted version of a given loss function.\n\n    To use this decorator, the loss function must have the signature like\n    `loss_func(pred, target, **kwargs)`. The function only needs to compute\n    element-wise loss without any reduction. This decorator will add weight\n    and reduction arguments to the function. The decorated function will have\n    the signature like `loss_func(pred, target, weight=None, reduction='mean',\n    avg_factor=None, **kwargs)`.\n\n    :Example:\n\n    >>> import torch\n    >>> @weighted_loss\n    >>> def l1_loss(pred, target):\n    >>>     return (pred - target).abs()\n\n    >>> pred = torch.Tensor([0, 2, 3])\n    >>> target = torch.Tensor([1, 1, 1])\n    >>> weight = torch.Tensor([1, 0, 1])\n\n    >>> l1_loss(pred, target)\n    tensor(1.3333)\n    >>> l1_loss(pred, target, weight)\n    tensor(1.)\n    >>> l1_loss(pred, target, reduction='none')\n    tensor([1., 1., 2.])\n    >>> l1_loss(pred, target, weight, avg_factor=2)\n    tensor(1.5000)\n    \"\"\"\n\n    @functools.wraps(loss_func)\n    def wrapper(pred,\n                target,\n                weight=None,\n                reduction='mean',\n                avg_factor=None,\n                **kwargs):\n        # get element-wise loss\n        loss = loss_func(pred, target, **kwargs)\n        loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n        return loss\n\n    return wrapper\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/losses/varifocal_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom ..builder import LOSSES\nfrom .utils import weight_reduce_loss\n\n\n@mmcv.jit(derivate=True, coderize=True)\ndef varifocal_loss(pred,\n                   target,\n                   weight=None,\n                   alpha=0.75,\n                   gamma=2.0,\n                   iou_weighted=True,\n                   reduction='mean',\n                   avg_factor=None):\n    \"\"\"`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_\n\n    Args:\n        pred (torch.Tensor): The prediction with shape (N, C), C is the\n            number of classes\n        target (torch.Tensor): The learning target of the iou-aware\n            classification score with shape (N, C), C is the number of classes.\n        weight (torch.Tensor, optional): The weight of loss for each\n            prediction. Defaults to None.\n        alpha (float, optional): A balance factor for the negative part of\n            Varifocal Loss, which is different from the alpha of Focal Loss.\n            Defaults to 0.75.\n        gamma (float, optional): The gamma for calculating the modulating\n            factor. Defaults to 2.0.\n        iou_weighted (bool, optional): Whether to weight the loss of the\n            positive example with the iou target. Defaults to True.\n        reduction (str, optional): The method used to reduce the loss into\n            a scalar. Defaults to 'mean'. Options are \"none\", \"mean\" and\n            \"sum\".\n        avg_factor (int, optional): Average factor that is used to average\n            the loss. Defaults to None.\n    \"\"\"\n    # pred and target should be of the same size\n    assert pred.size() == target.size()\n    pred_sigmoid = pred.sigmoid()\n    target = target.type_as(pred)\n    if iou_weighted:\n        focal_weight = target * (target > 0.0).float() + \\\n            alpha * (pred_sigmoid - target).abs().pow(gamma) * \\\n            (target <= 0.0).float()\n    else:\n        focal_weight = (target > 0.0).float() + \\\n            alpha * (pred_sigmoid - target).abs().pow(gamma) * \\\n            (target <= 0.0).float()\n    loss = F.binary_cross_entropy_with_logits(\n        pred, target, reduction='none') * focal_weight\n    loss = weight_reduce_loss(loss, weight, reduction, avg_factor)\n    return loss\n\n\n@LOSSES.register_module()\nclass VarifocalLoss(nn.Module):\n\n    def __init__(self,\n                 use_sigmoid=True,\n                 alpha=0.75,\n                 gamma=2.0,\n                 iou_weighted=True,\n                 reduction='mean',\n                 loss_weight=1.0):\n        \"\"\"`Varifocal Loss <https://arxiv.org/abs/2008.13367>`_\n\n        Args:\n            use_sigmoid (bool, optional): Whether the prediction is\n                used for sigmoid or softmax. Defaults to True.\n            alpha (float, optional): A balance factor for the negative part of\n                Varifocal Loss, which is different from the alpha of Focal\n                Loss. Defaults to 0.75.\n            gamma (float, optional): The gamma for calculating the modulating\n                factor. Defaults to 2.0.\n            iou_weighted (bool, optional): Whether to weight the loss of the\n                positive examples with the iou target. Defaults to True.\n            reduction (str, optional): The method used to reduce the loss into\n                a scalar. Defaults to 'mean'. Options are \"none\", \"mean\" and\n                \"sum\".\n            loss_weight (float, optional): Weight of loss. Defaults to 1.0.\n        \"\"\"\n        super(VarifocalLoss, self).__init__()\n        assert use_sigmoid is True, \\\n            'Only sigmoid varifocal loss supported now.'\n        assert alpha >= 0.0\n        self.use_sigmoid = use_sigmoid\n        self.alpha = alpha\n        self.gamma = gamma\n        self.iou_weighted = iou_weighted\n        self.reduction = reduction\n        self.loss_weight = loss_weight\n\n    def forward(self,\n                pred,\n                target,\n                weight=None,\n                avg_factor=None,\n                reduction_override=None):\n        \"\"\"Forward function.\n\n        Args:\n            pred (torch.Tensor): The prediction.\n            target (torch.Tensor): The learning target of the prediction.\n            weight (torch.Tensor, optional): The weight of loss for each\n                prediction. Defaults to None.\n            avg_factor (int, optional): Average factor that is used to average\n                the loss. Defaults to None.\n            reduction_override (str, optional): The reduction method used to\n                override the original reduction method of the loss.\n                Options are \"none\", \"mean\" and \"sum\".\n\n        Returns:\n            torch.Tensor: The calculated loss\n        \"\"\"\n        assert reduction_override in (None, 'none', 'mean', 'sum')\n        reduction = (\n            reduction_override if reduction_override else self.reduction)\n        if self.use_sigmoid:\n            loss_cls = self.loss_weight * varifocal_loss(\n                pred,\n                target,\n                weight,\n                alpha=self.alpha,\n                gamma=self.gamma,\n                iou_weighted=self.iou_weighted,\n                reduction=reduction,\n                avg_factor=avg_factor)\n        else:\n            raise NotImplementedError\n        return loss_cls\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .bfp import BFP\nfrom .channel_mapper import ChannelMapper\nfrom .ct_resnet_neck import CTResNetNeck\nfrom .dilated_encoder import DilatedEncoder\nfrom .dyhead import DyHead\nfrom .fpg import FPG\nfrom .fpn import FPN\nfrom .fpn_carafe import FPN_CARAFE\nfrom .hrfpn import HRFPN\nfrom .nas_fpn import NASFPN\nfrom .nasfcos_fpn import NASFCOS_FPN\nfrom .pafpn import PAFPN\nfrom .rfp import RFP\nfrom .ssd_neck import SSDNeck\nfrom .yolo_neck import YOLOV3Neck\nfrom .yolox_pafpn import YOLOXPAFPN\n\n__all__ = [\n    'FPN', 'BFP', 'ChannelMapper', 'HRFPN', 'NASFPN', 'FPN_CARAFE', 'PAFPN',\n    'NASFCOS_FPN', 'RFP', 'YOLOV3Neck', 'FPG', 'DilatedEncoder',\n    'CTResNetNeck', 'SSDNeck', 'YOLOXPAFPN', 'DyHead'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/bfp.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.cnn.bricks import NonLocal2d\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass BFP(BaseModule):\n    \"\"\"BFP (Balanced Feature Pyramids)\n\n    BFP takes multi-level features as inputs and gather them into a single one,\n    then refine the gathered feature and scatter the refined results to\n    multi-level features. This module is used in Libra R-CNN (CVPR 2019), see\n    the paper `Libra R-CNN: Towards Balanced Learning for Object Detection\n    <https://arxiv.org/abs/1904.02701>`_ for details.\n\n    Args:\n        in_channels (int): Number of input channels (feature maps of all levels\n            should have the same channels).\n        num_levels (int): Number of input feature levels.\n        conv_cfg (dict): The config dict for convolution layers.\n        norm_cfg (dict): The config dict for normalization layers.\n        refine_level (int): Index of integration and refine level of BSF in\n            multi-level features from bottom to top.\n        refine_type (str): Type of the refine op, currently support\n            [None, 'conv', 'non_local'].\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 num_levels,\n                 refine_level=2,\n                 refine_type=None,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=dict(\n                     type='Xavier', layer='Conv2d', distribution='uniform')):\n        super(BFP, self).__init__(init_cfg)\n        assert refine_type in [None, 'conv', 'non_local']\n\n        self.in_channels = in_channels\n        self.num_levels = num_levels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        self.refine_level = refine_level\n        self.refine_type = refine_type\n        assert 0 <= self.refine_level < self.num_levels\n\n        if self.refine_type == 'conv':\n            self.refine = ConvModule(\n                self.in_channels,\n                self.in_channels,\n                3,\n                padding=1,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n        elif self.refine_type == 'non_local':\n            self.refine = NonLocal2d(\n                self.in_channels,\n                reduction=1,\n                use_scale=False,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == self.num_levels\n\n        # step 1: gather multi-level features by resize and average\n        feats = []\n        gather_size = inputs[self.refine_level].size()[2:]\n        for i in range(self.num_levels):\n            if i < self.refine_level:\n                gathered = F.adaptive_max_pool2d(\n                    inputs[i], output_size=gather_size)\n            else:\n                gathered = F.interpolate(\n                    inputs[i], size=gather_size, mode='nearest')\n            feats.append(gathered)\n\n        bsf = sum(feats) / len(feats)\n\n        # step 2: refine gathered features\n        if self.refine_type is not None:\n            bsf = self.refine(bsf)\n\n        # step 3: scatter refined features to multi-levels by a residual path\n        outs = []\n        for i in range(self.num_levels):\n            out_size = inputs[i].size()[2:]\n            if i < self.refine_level:\n                residual = F.interpolate(bsf, size=out_size, mode='nearest')\n            else:\n                residual = F.adaptive_max_pool2d(bsf, output_size=out_size)\n            outs.append(residual + inputs[i])\n\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/channel_mapper.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass ChannelMapper(BaseModule):\n    r\"\"\"Channel Mapper to reduce/increase channels of backbone features.\n\n    This is used to reduce/increase channels of backbone features.\n\n    Args:\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale).\n        kernel_size (int, optional): kernel_size for reducing channels (used\n            at each scale). Default: 3.\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Default: None.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: None.\n        act_cfg (dict, optional): Config dict for activation layer in\n            ConvModule. Default: dict(type='ReLU').\n        num_outs (int, optional): Number of output feature maps. There\n            would be extra_convs when num_outs larger than the length\n            of in_channels.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    Example:\n        >>> import torch\n        >>> in_channels = [2, 3, 5, 7]\n        >>> scales = [340, 170, 84, 43]\n        >>> inputs = [torch.rand(1, c, s, s)\n        ...           for c, s in zip(in_channels, scales)]\n        >>> self = ChannelMapper(in_channels, 11, 3).eval()\n        >>> outputs = self.forward(inputs)\n        >>> for i in range(len(outputs)):\n        ...     print(f'outputs[{i}].shape = {outputs[i].shape}')\n        outputs[0].shape = torch.Size([1, 11, 340, 340])\n        outputs[1].shape = torch.Size([1, 11, 170, 170])\n        outputs[2].shape = torch.Size([1, 11, 84, 84])\n        outputs[3].shape = torch.Size([1, 11, 43, 43])\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size=3,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=dict(type='ReLU'),\n                 num_outs=None,\n                 init_cfg=dict(\n                     type='Xavier', layer='Conv2d', distribution='uniform')):\n        super(ChannelMapper, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.extra_convs = None\n        if num_outs is None:\n            num_outs = len(in_channels)\n        self.convs = nn.ModuleList()\n        for in_channel in in_channels:\n            self.convs.append(\n                ConvModule(\n                    in_channel,\n                    out_channels,\n                    kernel_size,\n                    padding=(kernel_size - 1) // 2,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n        if num_outs > len(in_channels):\n            self.extra_convs = nn.ModuleList()\n            for i in range(len(in_channels), num_outs):\n                if i == len(in_channels):\n                    in_channel = in_channels[-1]\n                else:\n                    in_channel = out_channels\n                self.extra_convs.append(\n                    ConvModule(\n                        in_channel,\n                        out_channels,\n                        3,\n                        stride=2,\n                        padding=1,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg,\n                        act_cfg=act_cfg))\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == len(self.convs)\n        outs = [self.convs[i](inputs[i]) for i in range(len(inputs))]\n        if self.extra_convs:\n            for i in range(len(self.extra_convs)):\n                if i == 0:\n                    outs.append(self.extra_convs[0](inputs[-1]))\n                else:\n                    outs.append(self.extra_convs[i](outs[-1]))\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/ct_resnet_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16\n\nfrom mmdet.models.builder import NECKS\n\n\n@NECKS.register_module()\nclass CTResNetNeck(BaseModule):\n    \"\"\"The neck used in `CenterNet <https://arxiv.org/abs/1904.07850>`_ for\n    object classification and box regression.\n\n    Args:\n         in_channel (int): Number of input channels.\n         num_deconv_filters (tuple[int]): Number of filters per stage.\n         num_deconv_kernels (tuple[int]): Number of kernels per stage.\n         use_dcn (bool): If True, use DCNv2. Default: True.\n         init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channel,\n                 num_deconv_filters,\n                 num_deconv_kernels,\n                 use_dcn=True,\n                 init_cfg=None):\n        super(CTResNetNeck, self).__init__(init_cfg)\n        assert len(num_deconv_filters) == len(num_deconv_kernels)\n        self.fp16_enabled = False\n        self.use_dcn = use_dcn\n        self.in_channel = in_channel\n        self.deconv_layers = self._make_deconv_layer(num_deconv_filters,\n                                                     num_deconv_kernels)\n\n    def _make_deconv_layer(self, num_deconv_filters, num_deconv_kernels):\n        \"\"\"use deconv layers to upsample backbone's output.\"\"\"\n        layers = []\n        for i in range(len(num_deconv_filters)):\n            feat_channel = num_deconv_filters[i]\n            conv_module = ConvModule(\n                self.in_channel,\n                feat_channel,\n                3,\n                padding=1,\n                conv_cfg=dict(type='DCNv2') if self.use_dcn else None,\n                norm_cfg=dict(type='BN'))\n            layers.append(conv_module)\n            upsample_module = ConvModule(\n                feat_channel,\n                feat_channel,\n                num_deconv_kernels[i],\n                stride=2,\n                padding=1,\n                conv_cfg=dict(type='deconv'),\n                norm_cfg=dict(type='BN'))\n            layers.append(upsample_module)\n            self.in_channel = feat_channel\n\n        return nn.Sequential(*layers)\n\n    def init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.ConvTranspose2d):\n                # In order to be consistent with the source code,\n                # reset the ConvTranspose2d initialization parameters\n                m.reset_parameters()\n                # Simulated bilinear upsampling kernel\n                w = m.weight.data\n                f = math.ceil(w.size(2) / 2)\n                c = (2 * f - 1 - f % 2) / (2. * f)\n                for i in range(w.size(2)):\n                    for j in range(w.size(3)):\n                        w[0, 0, i, j] = \\\n                            (1 - math.fabs(i / f - c)) * (\n                                    1 - math.fabs(j / f - c))\n                for c in range(1, w.size(0)):\n                    w[c, 0, :, :] = w[0, 0, :, :]\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            # self.use_dcn is False\n            elif not self.use_dcn and isinstance(m, nn.Conv2d):\n                # In order to be consistent with the source code,\n                # reset the Conv2d initialization parameters\n                m.reset_parameters()\n\n    @auto_fp16()\n    def forward(self, inputs):\n        assert isinstance(inputs, (list, tuple))\n        outs = self.deconv_layers(inputs[-1])\n        return outs,\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/dilated_encoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import (ConvModule, caffe2_xavier_init, constant_init, is_norm,\n                      normal_init)\nfrom torch.nn import BatchNorm2d\n\nfrom ..builder import NECKS\n\n\nclass Bottleneck(nn.Module):\n    \"\"\"Bottleneck block for DilatedEncoder used in `YOLOF.\n\n    <https://arxiv.org/abs/2103.09460>`.\n\n    The Bottleneck contains three ConvLayers and one residual connection.\n\n    Args:\n        in_channels (int): The number of input channels.\n        mid_channels (int): The number of middle output channels.\n        dilation (int): Dilation rate.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 mid_channels,\n                 dilation,\n                 norm_cfg=dict(type='BN', requires_grad=True)):\n        super(Bottleneck, self).__init__()\n        self.conv1 = ConvModule(\n            in_channels, mid_channels, 1, norm_cfg=norm_cfg)\n        self.conv2 = ConvModule(\n            mid_channels,\n            mid_channels,\n            3,\n            padding=dilation,\n            dilation=dilation,\n            norm_cfg=norm_cfg)\n        self.conv3 = ConvModule(\n            mid_channels, in_channels, 1, norm_cfg=norm_cfg)\n\n    def forward(self, x):\n        identity = x\n        out = self.conv1(x)\n        out = self.conv2(out)\n        out = self.conv3(out)\n        out = out + identity\n        return out\n\n\n@NECKS.register_module()\nclass DilatedEncoder(nn.Module):\n    \"\"\"Dilated Encoder for YOLOF <https://arxiv.org/abs/2103.09460>`.\n\n    This module contains two types of components:\n        - the original FPN lateral convolution layer and fpn convolution layer,\n              which are 1x1 conv + 3x3 conv\n        - the dilated residual block\n\n    Args:\n        in_channels (int): The number of input channels.\n        out_channels (int): The number of output channels.\n        block_mid_channels (int): The number of middle block output channels\n        num_residual_blocks (int): The number of residual blocks.\n        block_dilations (list): The list of residual blocks dilation.\n    \"\"\"\n\n    def __init__(self, in_channels, out_channels, block_mid_channels,\n                 num_residual_blocks, block_dilations):\n        super(DilatedEncoder, self).__init__()\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.block_mid_channels = block_mid_channels\n        self.num_residual_blocks = num_residual_blocks\n        self.block_dilations = block_dilations\n        self._init_layers()\n\n    def _init_layers(self):\n        self.lateral_conv = nn.Conv2d(\n            self.in_channels, self.out_channels, kernel_size=1)\n        self.lateral_norm = BatchNorm2d(self.out_channels)\n        self.fpn_conv = nn.Conv2d(\n            self.out_channels, self.out_channels, kernel_size=3, padding=1)\n        self.fpn_norm = BatchNorm2d(self.out_channels)\n        encoder_blocks = []\n        for i in range(self.num_residual_blocks):\n            dilation = self.block_dilations[i]\n            encoder_blocks.append(\n                Bottleneck(\n                    self.out_channels,\n                    self.block_mid_channels,\n                    dilation=dilation))\n        self.dilated_encoder_blocks = nn.Sequential(*encoder_blocks)\n\n    def init_weights(self):\n        caffe2_xavier_init(self.lateral_conv)\n        caffe2_xavier_init(self.fpn_conv)\n        for m in [self.lateral_norm, self.fpn_norm]:\n            constant_init(m, 1)\n        for m in self.dilated_encoder_blocks.modules():\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, mean=0, std=0.01)\n            if is_norm(m):\n                constant_init(m, 1)\n\n    def forward(self, feature):\n        out = self.lateral_norm(self.lateral_conv(feature[-1]))\n        out = self.fpn_norm(self.fpn_conv(out))\n        return self.dilated_encoder_blocks(out),\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/dyhead.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (build_activation_layer, build_norm_layer, constant_init,\n                      normal_init)\nfrom mmcv.ops.modulated_deform_conv import ModulatedDeformConv2d\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\nfrom ..utils import DyReLU\n\n# Reference:\n# https://github.com/microsoft/DynamicHead\n# https://github.com/jshilong/SEPC\n\n\nclass DyDCNv2(nn.Module):\n    \"\"\"ModulatedDeformConv2d with normalization layer used in DyHead.\n\n    This module cannot be configured with `conv_cfg=dict(type='DCNv2')`\n    because DyHead calculates offset and mask from middle-level feature.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        stride (int | tuple[int], optional): Stride of the convolution.\n            Default: 1.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: dict(type='GN', num_groups=16, requires_grad=True).\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 stride=1,\n                 norm_cfg=dict(type='GN', num_groups=16, requires_grad=True)):\n        super().__init__()\n        self.with_norm = norm_cfg is not None\n        bias = not self.with_norm\n        self.conv = ModulatedDeformConv2d(\n            in_channels, out_channels, 3, stride=stride, padding=1, bias=bias)\n        if self.with_norm:\n            self.norm = build_norm_layer(norm_cfg, out_channels)[1]\n\n    def forward(self, x, offset, mask):\n        \"\"\"Forward function.\"\"\"\n        x = self.conv(x.contiguous(), offset.contiguous(), mask)\n        if self.with_norm:\n            x = self.norm(x)\n        return x\n\n\nclass DyHeadBlock(nn.Module):\n    \"\"\"DyHead Block with three types of attention.\n\n    HSigmoid arguments in default act_cfg follow official code, not paper.\n    https://github.com/microsoft/DynamicHead/blob/master/dyhead/dyrelu.py\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        zero_init_offset (bool, optional): Whether to use zero init for\n            `spatial_conv_offset`. Default: True.\n        act_cfg (dict, optional): Config dict for the last activation layer of\n            scale-aware attention. Default: dict(type='HSigmoid', bias=3.0,\n            divisor=6.0).\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 zero_init_offset=True,\n                 act_cfg=dict(type='HSigmoid', bias=3.0, divisor=6.0)):\n        super().__init__()\n        self.zero_init_offset = zero_init_offset\n        # (offset_x, offset_y, mask) * kernel_size_y * kernel_size_x\n        self.offset_and_mask_dim = 3 * 3 * 3\n        self.offset_dim = 2 * 3 * 3\n\n        self.spatial_conv_high = DyDCNv2(in_channels, out_channels)\n        self.spatial_conv_mid = DyDCNv2(in_channels, out_channels)\n        self.spatial_conv_low = DyDCNv2(in_channels, out_channels, stride=2)\n        self.spatial_conv_offset = nn.Conv2d(\n            in_channels, self.offset_and_mask_dim, 3, padding=1)\n        self.scale_attn_module = nn.Sequential(\n            nn.AdaptiveAvgPool2d(1), nn.Conv2d(out_channels, 1, 1),\n            nn.ReLU(inplace=True), build_activation_layer(act_cfg))\n        self.task_attn_module = DyReLU(out_channels)\n        self._init_weights()\n\n    def _init_weights(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                normal_init(m, 0, 0.01)\n        if self.zero_init_offset:\n            constant_init(self.spatial_conv_offset, 0)\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        outs = []\n        for level in range(len(x)):\n            # calculate offset and mask of DCNv2 from middle-level feature\n            offset_and_mask = self.spatial_conv_offset(x[level])\n            offset = offset_and_mask[:, :self.offset_dim, :, :]\n            mask = offset_and_mask[:, self.offset_dim:, :, :].sigmoid()\n\n            mid_feat = self.spatial_conv_mid(x[level], offset, mask)\n            sum_feat = mid_feat * self.scale_attn_module(mid_feat)\n            summed_levels = 1\n            if level > 0:\n                low_feat = self.spatial_conv_low(x[level - 1], offset, mask)\n                sum_feat = sum_feat + \\\n                    low_feat * self.scale_attn_module(low_feat)\n                summed_levels += 1\n            if level < len(x) - 1:\n                # this upsample order is weird, but faster than natural order\n                # https://github.com/microsoft/DynamicHead/issues/25\n                high_feat = F.interpolate(\n                    self.spatial_conv_high(x[level + 1], offset, mask),\n                    size=x[level].shape[-2:],\n                    mode='bilinear',\n                    align_corners=True)\n                sum_feat = sum_feat + high_feat * \\\n                    self.scale_attn_module(high_feat)\n                summed_levels += 1\n            outs.append(self.task_attn_module(sum_feat / summed_levels))\n\n        return outs\n\n\n@NECKS.register_module()\nclass DyHead(BaseModule):\n    \"\"\"DyHead neck consisting of multiple DyHead Blocks.\n\n    See `Dynamic Head: Unifying Object Detection Heads with Attentions\n    <https://arxiv.org/abs/2106.08322>`_ for details.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        num_blocks (int, optional): Number of DyHead Blocks. Default: 6.\n        zero_init_offset (bool, optional): Whether to use zero init for\n            `spatial_conv_offset`. Default: True.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_blocks=6,\n                 zero_init_offset=True,\n                 init_cfg=None):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super().__init__(init_cfg=init_cfg)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_blocks = num_blocks\n        self.zero_init_offset = zero_init_offset\n\n        dyhead_blocks = []\n        for i in range(num_blocks):\n            in_channels = self.in_channels if i == 0 else self.out_channels\n            dyhead_blocks.append(\n                DyHeadBlock(\n                    in_channels,\n                    self.out_channels,\n                    zero_init_offset=zero_init_offset))\n        self.dyhead_blocks = nn.Sequential(*dyhead_blocks)\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert isinstance(inputs, (tuple, list))\n        outs = self.dyhead_blocks(inputs)\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/fpg.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\n\n\nclass Transition(BaseModule):\n    \"\"\"Base class for transition.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n    \"\"\"\n\n    def __init__(self, in_channels, out_channels, init_cfg=None):\n        super().__init__(init_cfg)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n\n    def forward(x):\n        pass\n\n\nclass UpInterpolationConv(Transition):\n    \"\"\"A transition used for up-sampling.\n\n    Up-sample the input by interpolation then refines the feature by\n    a convolution layer.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        scale_factor (int): Up-sampling factor. Default: 2.\n        mode (int): Interpolation mode. Default: nearest.\n        align_corners (bool): Whether align corners when interpolation.\n            Default: None.\n        kernel_size (int): Kernel size for the conv. Default: 3.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 scale_factor=2,\n                 mode='nearest',\n                 align_corners=None,\n                 kernel_size=3,\n                 init_cfg=None,\n                 **kwargs):\n        super().__init__(in_channels, out_channels, init_cfg)\n        self.mode = mode\n        self.scale_factor = scale_factor\n        self.align_corners = align_corners\n        self.conv = ConvModule(\n            in_channels,\n            out_channels,\n            kernel_size,\n            padding=(kernel_size - 1) // 2,\n            **kwargs)\n\n    def forward(self, x):\n        x = F.interpolate(\n            x,\n            scale_factor=self.scale_factor,\n            mode=self.mode,\n            align_corners=self.align_corners)\n        x = self.conv(x)\n        return x\n\n\nclass LastConv(Transition):\n    \"\"\"A transition used for refining the output of the last stage.\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of output channels.\n        num_inputs (int): Number of inputs of the FPN features.\n        kernel_size (int): Kernel size for the conv. Default: 3.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_inputs,\n                 kernel_size=3,\n                 init_cfg=None,\n                 **kwargs):\n        super().__init__(in_channels, out_channels, init_cfg)\n        self.num_inputs = num_inputs\n        self.conv_out = ConvModule(\n            in_channels,\n            out_channels,\n            kernel_size,\n            padding=(kernel_size - 1) // 2,\n            **kwargs)\n\n    def forward(self, inputs):\n        assert len(inputs) == self.num_inputs\n        return self.conv_out(inputs[-1])\n\n\n@NECKS.register_module()\nclass FPG(BaseModule):\n    \"\"\"FPG.\n\n    Implementation of `Feature Pyramid Grids (FPG)\n    <https://arxiv.org/abs/2004.03580>`_.\n    This implementation only gives the basic structure stated in the paper.\n    But users can implement different type of transitions to fully explore the\n    the potential power of the structure of FPG.\n\n    Args:\n        in_channels (int): Number of input channels (feature maps of all levels\n            should have the same channels).\n        out_channels (int): Number of output channels (used at each scale)\n        num_outs (int): Number of output scales.\n        stack_times (int): The number of times the pyramid architecture will\n            be stacked.\n        paths (list[str]): Specify the path order of each stack level.\n            Each element in the list should be either 'bu' (bottom-up) or\n            'td' (top-down).\n        inter_channels (int): Number of inter channels.\n        same_up_trans (dict): Transition that goes down at the same stage.\n        same_down_trans (dict): Transition that goes up at the same stage.\n        across_lateral_trans (dict): Across-pathway same-stage\n        across_down_trans (dict): Across-pathway bottom-up connection.\n        across_up_trans (dict): Across-pathway top-down connection.\n        across_skip_trans (dict): Across-pathway skip connection.\n        output_trans (dict): Transition that trans the output of the\n            last stage.\n        start_level (int): Index of the start input backbone level used to\n            build the feature pyramid. Default: 0.\n        end_level (int): Index of the end input backbone level (exclusive) to\n            build the feature pyramid. Default: -1, which means the last level.\n        add_extra_convs (bool): It decides whether to add conv\n            layers on top of the original feature maps. Default to False.\n            If True, its actual mode is specified by `extra_convs_on_inputs`.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    transition_types = {\n        'conv': ConvModule,\n        'interpolation_conv': UpInterpolationConv,\n        'last_conv': LastConv,\n    }\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 stack_times,\n                 paths,\n                 inter_channels=None,\n                 same_down_trans=None,\n                 same_up_trans=dict(\n                     type='conv', kernel_size=3, stride=2, padding=1),\n                 across_lateral_trans=dict(type='conv', kernel_size=1),\n                 across_down_trans=dict(type='conv', kernel_size=3),\n                 across_up_trans=None,\n                 across_skip_trans=dict(type='identity'),\n                 output_trans=dict(type='last_conv', kernel_size=3),\n                 start_level=0,\n                 end_level=-1,\n                 add_extra_convs=False,\n                 norm_cfg=None,\n                 skip_inds=None,\n                 init_cfg=[\n                     dict(type='Caffe2Xavier', layer='Conv2d'),\n                     dict(\n                         type='Constant',\n                         layer=[\n                             '_BatchNorm', '_InstanceNorm', 'GroupNorm',\n                             'LayerNorm'\n                         ],\n                         val=1.0)\n                 ]):\n        super(FPG, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        if inter_channels is None:\n            self.inter_channels = [out_channels for _ in range(num_outs)]\n        elif isinstance(inter_channels, int):\n            self.inter_channels = [inter_channels for _ in range(num_outs)]\n        else:\n            assert isinstance(inter_channels, list)\n            assert len(inter_channels) == num_outs\n            self.inter_channels = inter_channels\n        self.stack_times = stack_times\n        self.paths = paths\n        assert isinstance(paths, list) and len(paths) == stack_times\n        for d in paths:\n            assert d in ('bu', 'td')\n\n        self.same_down_trans = same_down_trans\n        self.same_up_trans = same_up_trans\n        self.across_lateral_trans = across_lateral_trans\n        self.across_down_trans = across_down_trans\n        self.across_up_trans = across_up_trans\n        self.output_trans = output_trans\n        self.across_skip_trans = across_skip_trans\n\n        self.with_bias = norm_cfg is None\n        # skip inds must be specified if across skip trans is not None\n        if self.across_skip_trans is not None:\n            skip_inds is not None\n        self.skip_inds = skip_inds\n        assert len(self.skip_inds[0]) <= self.stack_times\n\n        if end_level == -1 or end_level == self.num_ins - 1:\n            self.backbone_end_level = self.num_ins\n            assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level is not the last level, no extra level is allowed\n            self.backbone_end_level = end_level + 1\n            assert end_level < self.num_ins\n            assert num_outs == end_level - start_level + 1\n        self.start_level = start_level\n        self.end_level = end_level\n        self.add_extra_convs = add_extra_convs\n\n        # build lateral 1x1 convs to reduce channels\n        self.lateral_convs = nn.ModuleList()\n        for i in range(self.start_level, self.backbone_end_level):\n            l_conv = nn.Conv2d(self.in_channels[i],\n                               self.inter_channels[i - self.start_level], 1)\n            self.lateral_convs.append(l_conv)\n\n        extra_levels = num_outs - self.backbone_end_level + self.start_level\n        self.extra_downsamples = nn.ModuleList()\n        for i in range(extra_levels):\n            if self.add_extra_convs:\n                fpn_idx = self.backbone_end_level - self.start_level + i\n                extra_conv = nn.Conv2d(\n                    self.inter_channels[fpn_idx - 1],\n                    self.inter_channels[fpn_idx],\n                    3,\n                    stride=2,\n                    padding=1)\n                self.extra_downsamples.append(extra_conv)\n            else:\n                self.extra_downsamples.append(nn.MaxPool2d(1, stride=2))\n\n        self.fpn_transitions = nn.ModuleList()  # stack times\n        for s in range(self.stack_times):\n            stage_trans = nn.ModuleList()  # num of feature levels\n            for i in range(self.num_outs):\n                # same, across_lateral, across_down, across_up\n                trans = nn.ModuleDict()\n                if s in self.skip_inds[i]:\n                    stage_trans.append(trans)\n                    continue\n                # build same-stage down trans (used in bottom-up paths)\n                if i == 0 or self.same_up_trans is None:\n                    same_up_trans = None\n                else:\n                    same_up_trans = self.build_trans(\n                        self.same_up_trans, self.inter_channels[i - 1],\n                        self.inter_channels[i])\n                trans['same_up'] = same_up_trans\n                # build same-stage up trans (used in top-down paths)\n                if i == self.num_outs - 1 or self.same_down_trans is None:\n                    same_down_trans = None\n                else:\n                    same_down_trans = self.build_trans(\n                        self.same_down_trans, self.inter_channels[i + 1],\n                        self.inter_channels[i])\n                trans['same_down'] = same_down_trans\n                # build across lateral trans\n                across_lateral_trans = self.build_trans(\n                    self.across_lateral_trans, self.inter_channels[i],\n                    self.inter_channels[i])\n                trans['across_lateral'] = across_lateral_trans\n                # build across down trans\n                if i == self.num_outs - 1 or self.across_down_trans is None:\n                    across_down_trans = None\n                else:\n                    across_down_trans = self.build_trans(\n                        self.across_down_trans, self.inter_channels[i + 1],\n                        self.inter_channels[i])\n                trans['across_down'] = across_down_trans\n                # build across up trans\n                if i == 0 or self.across_up_trans is None:\n                    across_up_trans = None\n                else:\n                    across_up_trans = self.build_trans(\n                        self.across_up_trans, self.inter_channels[i - 1],\n                        self.inter_channels[i])\n                trans['across_up'] = across_up_trans\n                if self.across_skip_trans is None:\n                    across_skip_trans = None\n                else:\n                    across_skip_trans = self.build_trans(\n                        self.across_skip_trans, self.inter_channels[i - 1],\n                        self.inter_channels[i])\n                trans['across_skip'] = across_skip_trans\n                # build across_skip trans\n                stage_trans.append(trans)\n            self.fpn_transitions.append(stage_trans)\n\n        self.output_transition = nn.ModuleList()  # output levels\n        for i in range(self.num_outs):\n            trans = self.build_trans(\n                self.output_trans,\n                self.inter_channels[i],\n                self.out_channels,\n                num_inputs=self.stack_times + 1)\n            self.output_transition.append(trans)\n\n        self.relu = nn.ReLU(inplace=True)\n\n    def build_trans(self, cfg, in_channels, out_channels, **extra_args):\n        cfg_ = cfg.copy()\n        trans_type = cfg_.pop('type')\n        trans_cls = self.transition_types[trans_type]\n        return trans_cls(in_channels, out_channels, **cfg_, **extra_args)\n\n    def fuse(self, fuse_dict):\n        out = None\n        for item in fuse_dict.values():\n            if item is not None:\n                if out is None:\n                    out = item\n                else:\n                    out = out + item\n        return out\n\n    def forward(self, inputs):\n        assert len(inputs) == len(self.in_channels)\n\n        # build all levels from original feature maps\n        feats = [\n            lateral_conv(inputs[i + self.start_level])\n            for i, lateral_conv in enumerate(self.lateral_convs)\n        ]\n        for downsample in self.extra_downsamples:\n            feats.append(downsample(feats[-1]))\n\n        outs = [feats]\n\n        for i in range(self.stack_times):\n            current_outs = outs[-1]\n            next_outs = []\n            direction = self.paths[i]\n            for j in range(self.num_outs):\n                if i in self.skip_inds[j]:\n                    next_outs.append(outs[-1][j])\n                    continue\n                # feature level\n                if direction == 'td':\n                    lvl = self.num_outs - j - 1\n                else:\n                    lvl = j\n                # get transitions\n                if direction == 'td':\n                    same_trans = self.fpn_transitions[i][lvl]['same_down']\n                else:\n                    same_trans = self.fpn_transitions[i][lvl]['same_up']\n                across_lateral_trans = self.fpn_transitions[i][lvl][\n                    'across_lateral']\n                across_down_trans = self.fpn_transitions[i][lvl]['across_down']\n                across_up_trans = self.fpn_transitions[i][lvl]['across_up']\n                across_skip_trans = self.fpn_transitions[i][lvl]['across_skip']\n                # init output\n                to_fuse = dict(\n                    same=None, lateral=None, across_up=None, across_down=None)\n                # same downsample/upsample\n                if same_trans is not None:\n                    to_fuse['same'] = same_trans(next_outs[-1])\n                # across lateral\n                if across_lateral_trans is not None:\n                    to_fuse['lateral'] = across_lateral_trans(\n                        current_outs[lvl])\n                # across downsample\n                if lvl > 0 and across_up_trans is not None:\n                    to_fuse['across_up'] = across_up_trans(current_outs[lvl -\n                                                                        1])\n                # across upsample\n                if (lvl < self.num_outs - 1 and across_down_trans is not None):\n                    to_fuse['across_down'] = across_down_trans(\n                        current_outs[lvl + 1])\n                if across_skip_trans is not None:\n                    to_fuse['across_skip'] = across_skip_trans(outs[0][lvl])\n                x = self.fuse(to_fuse)\n                next_outs.append(x)\n\n            if direction == 'td':\n                outs.append(next_outs[::-1])\n            else:\n                outs.append(next_outs)\n\n        # output trans\n        final_outs = []\n        for i in range(self.num_outs):\n            lvl_out_list = []\n            for s in range(len(outs)):\n                lvl_out_list.append(outs[s][i])\n            lvl_out = self.output_transition[i](lvl_out_list)\n            final_outs.append(lvl_out)\n\n        return final_outs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/fpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass FPN(BaseModule):\n    r\"\"\"Feature Pyramid Network.\n\n    This is an implementation of paper `Feature Pyramid Networks for Object\n    Detection <https://arxiv.org/abs/1612.03144>`_.\n\n    Args:\n        in_channels (list[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale).\n        num_outs (int): Number of output scales.\n        start_level (int): Index of the start input backbone level used to\n            build the feature pyramid. Default: 0.\n        end_level (int): Index of the end input backbone level (exclusive) to\n            build the feature pyramid. Default: -1, which means the last level.\n        add_extra_convs (bool | str): If bool, it decides whether to add conv\n            layers on top of the original feature maps. Default to False.\n            If True, it is equivalent to `add_extra_convs='on_input'`.\n            If str, it specifies the source feature map of the extra convs.\n            Only the following options are allowed\n\n            - 'on_input': Last feat map of neck inputs (i.e. backbone feature).\n            - 'on_lateral': Last feature map after lateral convs.\n            - 'on_output': The last output feature map after fpn convs.\n        relu_before_extra_convs (bool): Whether to apply relu before the extra\n            conv. Default: False.\n        no_norm_on_lateral (bool): Whether to apply norm on lateral.\n            Default: False.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        act_cfg (dict): Config dict for activation layer in ConvModule.\n            Default: None.\n        upsample_cfg (dict): Config dict for interpolate layer.\n            Default: dict(mode='nearest').\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n\n    Example:\n        >>> import torch\n        >>> in_channels = [2, 3, 5, 7]\n        >>> scales = [340, 170, 84, 43]\n        >>> inputs = [torch.rand(1, c, s, s)\n        ...           for c, s in zip(in_channels, scales)]\n        >>> self = FPN(in_channels, 11, len(in_channels)).eval()\n        >>> outputs = self.forward(inputs)\n        >>> for i in range(len(outputs)):\n        ...     print(f'outputs[{i}].shape = {outputs[i].shape}')\n        outputs[0].shape = torch.Size([1, 11, 340, 340])\n        outputs[1].shape = torch.Size([1, 11, 170, 170])\n        outputs[2].shape = torch.Size([1, 11, 84, 84])\n        outputs[3].shape = torch.Size([1, 11, 43, 43])\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 start_level=0,\n                 end_level=-1,\n                 add_extra_convs=False,\n                 relu_before_extra_convs=False,\n                 no_norm_on_lateral=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=None,\n                 upsample_cfg=dict(mode='nearest'),\n                 init_cfg=dict(\n                     type='Xavier', layer='Conv2d', distribution='uniform')):\n        super(FPN, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        self.relu_before_extra_convs = relu_before_extra_convs\n        self.no_norm_on_lateral = no_norm_on_lateral\n        self.fp16_enabled = False\n        self.upsample_cfg = upsample_cfg.copy()\n\n        if end_level == -1 or end_level == self.num_ins - 1:\n            self.backbone_end_level = self.num_ins\n            assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level is not the last level, no extra level is allowed\n            self.backbone_end_level = end_level + 1\n            assert end_level < self.num_ins\n            assert num_outs == end_level - start_level + 1\n        self.start_level = start_level\n        self.end_level = end_level\n        self.add_extra_convs = add_extra_convs\n        assert isinstance(add_extra_convs, (str, bool))\n        if isinstance(add_extra_convs, str):\n            # Extra_convs_source choices: 'on_input', 'on_lateral', 'on_output'\n            assert add_extra_convs in ('on_input', 'on_lateral', 'on_output')\n        elif add_extra_convs:  # True\n            self.add_extra_convs = 'on_input'\n\n        self.lateral_convs = nn.ModuleList()\n        self.fpn_convs = nn.ModuleList()\n\n        for i in range(self.start_level, self.backbone_end_level):\n            l_conv = ConvModule(\n                in_channels[i],\n                out_channels,\n                1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg if not self.no_norm_on_lateral else None,\n                act_cfg=act_cfg,\n                inplace=False)\n            fpn_conv = ConvModule(\n                out_channels,\n                out_channels,\n                3,\n                padding=1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg,\n                inplace=False)\n\n            self.lateral_convs.append(l_conv)\n            self.fpn_convs.append(fpn_conv)\n\n        # add extra conv layers (e.g., RetinaNet)\n        extra_levels = num_outs - self.backbone_end_level + self.start_level\n        if self.add_extra_convs and extra_levels >= 1:\n            for i in range(extra_levels):\n                if i == 0 and self.add_extra_convs == 'on_input':\n                    in_channels = self.in_channels[self.backbone_end_level - 1]\n                else:\n                    in_channels = out_channels\n                extra_fpn_conv = ConvModule(\n                    in_channels,\n                    out_channels,\n                    3,\n                    stride=2,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg,\n                    inplace=False)\n                self.fpn_convs.append(extra_fpn_conv)\n\n    @auto_fp16()\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == len(self.in_channels)\n\n        # build laterals\n        laterals = [\n            lateral_conv(inputs[i + self.start_level])\n            for i, lateral_conv in enumerate(self.lateral_convs)\n        ]\n\n        # build top-down path\n        used_backbone_levels = len(laterals)\n        for i in range(used_backbone_levels - 1, 0, -1):\n            # In some cases, fixing `scale factor` (e.g. 2) is preferred, but\n            #  it cannot co-exist with `size` in `F.interpolate`.\n            if 'scale_factor' in self.upsample_cfg:\n                # fix runtime error of \"+=\" inplace operation in PyTorch 1.10\n                laterals[i - 1] = laterals[i - 1] + F.interpolate(\n                    laterals[i], **self.upsample_cfg)\n            else:\n                prev_shape = laterals[i - 1].shape[2:]\n                laterals[i - 1] = laterals[i - 1] + F.interpolate(\n                    laterals[i], size=prev_shape, **self.upsample_cfg)\n\n        # build outputs\n        # part 1: from original levels\n        outs = [\n            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)\n        ]\n        # part 2: add extra levels\n        if self.num_outs > len(outs):\n            # use max pool to get more levels on top of outputs\n            # (e.g., Faster R-CNN, Mask R-CNN)\n            if not self.add_extra_convs:\n                for i in range(self.num_outs - used_backbone_levels):\n                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))\n            # add conv layers on top of original feature maps (RetinaNet)\n            else:\n                if self.add_extra_convs == 'on_input':\n                    extra_source = inputs[self.backbone_end_level - 1]\n                elif self.add_extra_convs == 'on_lateral':\n                    extra_source = laterals[-1]\n                elif self.add_extra_convs == 'on_output':\n                    extra_source = outs[-1]\n                else:\n                    raise NotImplementedError\n                outs.append(self.fpn_convs[used_backbone_levels](extra_source))\n                for i in range(used_backbone_levels + 1, self.num_outs):\n                    if self.relu_before_extra_convs:\n                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))\n                    else:\n                        outs.append(self.fpn_convs[i](outs[-1]))\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/fpn_carafe.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, build_upsample_layer, xavier_init\nfrom mmcv.ops.carafe import CARAFEPack\nfrom mmcv.runner import BaseModule, ModuleList\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass FPN_CARAFE(BaseModule):\n    \"\"\"FPN_CARAFE is a more flexible implementation of FPN. It allows more\n    choice for upsample methods during the top-down pathway.\n\n    It can reproduce the performance of ICCV 2019 paper\n    CARAFE: Content-Aware ReAssembly of FEatures\n    Please refer to https://arxiv.org/abs/1905.02188 for more details.\n\n    Args:\n        in_channels (list[int]): Number of channels for each input feature map.\n        out_channels (int): Output channels of feature pyramids.\n        num_outs (int): Number of output stages.\n        start_level (int): Start level of feature pyramids.\n            (Default: 0)\n        end_level (int): End level of feature pyramids.\n            (Default: -1 indicates the last level).\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n        activate (str): Type of activation function in ConvModule\n            (Default: None indicates w/o activation).\n        order (dict): Order of components in ConvModule.\n        upsample (str): Type of upsample layer.\n        upsample_cfg (dict): Dictionary to construct and config upsample layer.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 start_level=0,\n                 end_level=-1,\n                 norm_cfg=None,\n                 act_cfg=None,\n                 order=('conv', 'norm', 'act'),\n                 upsample_cfg=dict(\n                     type='carafe',\n                     up_kernel=5,\n                     up_group=1,\n                     encoder_kernel=3,\n                     encoder_dilation=1),\n                 init_cfg=None):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(FPN_CARAFE, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        self.norm_cfg = norm_cfg\n        self.act_cfg = act_cfg\n        self.with_bias = norm_cfg is None\n        self.upsample_cfg = upsample_cfg.copy()\n        self.upsample = self.upsample_cfg.get('type')\n        self.relu = nn.ReLU(inplace=False)\n\n        self.order = order\n        assert order in [('conv', 'norm', 'act'), ('act', 'conv', 'norm')]\n\n        assert self.upsample in [\n            'nearest', 'bilinear', 'deconv', 'pixel_shuffle', 'carafe', None\n        ]\n        if self.upsample in ['deconv', 'pixel_shuffle']:\n            assert hasattr(\n                self.upsample_cfg,\n                'upsample_kernel') and self.upsample_cfg.upsample_kernel > 0\n            self.upsample_kernel = self.upsample_cfg.pop('upsample_kernel')\n\n        if end_level == -1 or end_level == self.num_ins - 1:\n            self.backbone_end_level = self.num_ins\n            assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level is not the last level, no extra level is allowed\n            self.backbone_end_level = end_level + 1\n            assert end_level < self.num_ins\n            assert num_outs == end_level - start_level + 1\n        self.start_level = start_level\n        self.end_level = end_level\n\n        self.lateral_convs = ModuleList()\n        self.fpn_convs = ModuleList()\n        self.upsample_modules = ModuleList()\n\n        for i in range(self.start_level, self.backbone_end_level):\n            l_conv = ConvModule(\n                in_channels[i],\n                out_channels,\n                1,\n                norm_cfg=norm_cfg,\n                bias=self.with_bias,\n                act_cfg=act_cfg,\n                inplace=False,\n                order=self.order)\n            fpn_conv = ConvModule(\n                out_channels,\n                out_channels,\n                3,\n                padding=1,\n                norm_cfg=self.norm_cfg,\n                bias=self.with_bias,\n                act_cfg=act_cfg,\n                inplace=False,\n                order=self.order)\n            if i != self.backbone_end_level - 1:\n                upsample_cfg_ = self.upsample_cfg.copy()\n                if self.upsample == 'deconv':\n                    upsample_cfg_.update(\n                        in_channels=out_channels,\n                        out_channels=out_channels,\n                        kernel_size=self.upsample_kernel,\n                        stride=2,\n                        padding=(self.upsample_kernel - 1) // 2,\n                        output_padding=(self.upsample_kernel - 1) // 2)\n                elif self.upsample == 'pixel_shuffle':\n                    upsample_cfg_.update(\n                        in_channels=out_channels,\n                        out_channels=out_channels,\n                        scale_factor=2,\n                        upsample_kernel=self.upsample_kernel)\n                elif self.upsample == 'carafe':\n                    upsample_cfg_.update(channels=out_channels, scale_factor=2)\n                else:\n                    # suppress warnings\n                    align_corners = (None\n                                     if self.upsample == 'nearest' else False)\n                    upsample_cfg_.update(\n                        scale_factor=2,\n                        mode=self.upsample,\n                        align_corners=align_corners)\n                upsample_module = build_upsample_layer(upsample_cfg_)\n                self.upsample_modules.append(upsample_module)\n            self.lateral_convs.append(l_conv)\n            self.fpn_convs.append(fpn_conv)\n\n        # add extra conv layers (e.g., RetinaNet)\n        extra_out_levels = (\n            num_outs - self.backbone_end_level + self.start_level)\n        if extra_out_levels >= 1:\n            for i in range(extra_out_levels):\n                in_channels = (\n                    self.in_channels[self.backbone_end_level -\n                                     1] if i == 0 else out_channels)\n                extra_l_conv = ConvModule(\n                    in_channels,\n                    out_channels,\n                    3,\n                    stride=2,\n                    padding=1,\n                    norm_cfg=norm_cfg,\n                    bias=self.with_bias,\n                    act_cfg=act_cfg,\n                    inplace=False,\n                    order=self.order)\n                if self.upsample == 'deconv':\n                    upsampler_cfg_ = dict(\n                        in_channels=out_channels,\n                        out_channels=out_channels,\n                        kernel_size=self.upsample_kernel,\n                        stride=2,\n                        padding=(self.upsample_kernel - 1) // 2,\n                        output_padding=(self.upsample_kernel - 1) // 2)\n                elif self.upsample == 'pixel_shuffle':\n                    upsampler_cfg_ = dict(\n                        in_channels=out_channels,\n                        out_channels=out_channels,\n                        scale_factor=2,\n                        upsample_kernel=self.upsample_kernel)\n                elif self.upsample == 'carafe':\n                    upsampler_cfg_ = dict(\n                        channels=out_channels,\n                        scale_factor=2,\n                        **self.upsample_cfg)\n                else:\n                    # suppress warnings\n                    align_corners = (None\n                                     if self.upsample == 'nearest' else False)\n                    upsampler_cfg_ = dict(\n                        scale_factor=2,\n                        mode=self.upsample,\n                        align_corners=align_corners)\n                upsampler_cfg_['type'] = self.upsample\n                upsample_module = build_upsample_layer(upsampler_cfg_)\n                extra_fpn_conv = ConvModule(\n                    out_channels,\n                    out_channels,\n                    3,\n                    padding=1,\n                    norm_cfg=self.norm_cfg,\n                    bias=self.with_bias,\n                    act_cfg=act_cfg,\n                    inplace=False,\n                    order=self.order)\n                self.upsample_modules.append(upsample_module)\n                self.fpn_convs.append(extra_fpn_conv)\n                self.lateral_convs.append(extra_l_conv)\n\n    # default init_weights for conv(msra) and norm in ConvModule\n    def init_weights(self):\n        \"\"\"Initialize the weights of module.\"\"\"\n        super(FPN_CARAFE, self).init_weights()\n        for m in self.modules():\n            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):\n                xavier_init(m, distribution='uniform')\n        for m in self.modules():\n            if isinstance(m, CARAFEPack):\n                m.init_weights()\n\n    def slice_as(self, src, dst):\n        \"\"\"Slice ``src`` as ``dst``\n\n        Note:\n            ``src`` should have the same or larger size than ``dst``.\n\n        Args:\n            src (torch.Tensor): Tensors to be sliced.\n            dst (torch.Tensor): ``src`` will be sliced to have the same\n                size as ``dst``.\n\n        Returns:\n            torch.Tensor: Sliced tensor.\n        \"\"\"\n        assert (src.size(2) >= dst.size(2)) and (src.size(3) >= dst.size(3))\n        if src.size(2) == dst.size(2) and src.size(3) == dst.size(3):\n            return src\n        else:\n            return src[:, :, :dst.size(2), :dst.size(3)]\n\n    def tensor_add(self, a, b):\n        \"\"\"Add tensors ``a`` and ``b`` that might have different sizes.\"\"\"\n        if a.size() == b.size():\n            c = a + b\n        else:\n            c = a + self.slice_as(b, a)\n        return c\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == len(self.in_channels)\n\n        # build laterals\n        laterals = []\n        for i, lateral_conv in enumerate(self.lateral_convs):\n            if i <= self.backbone_end_level - self.start_level:\n                input = inputs[min(i + self.start_level, len(inputs) - 1)]\n            else:\n                input = laterals[-1]\n            lateral = lateral_conv(input)\n            laterals.append(lateral)\n\n        # build top-down path\n        for i in range(len(laterals) - 1, 0, -1):\n            if self.upsample is not None:\n                upsample_feat = self.upsample_modules[i - 1](laterals[i])\n            else:\n                upsample_feat = laterals[i]\n            laterals[i - 1] = self.tensor_add(laterals[i - 1], upsample_feat)\n\n        # build outputs\n        num_conv_outs = len(self.fpn_convs)\n        outs = []\n        for i in range(num_conv_outs):\n            out = self.fpn_convs[i](laterals[i])\n            outs.append(out)\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/hrfpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\nfrom torch.utils.checkpoint import checkpoint\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass HRFPN(BaseModule):\n    \"\"\"HRFPN (High Resolution Feature Pyramids)\n\n    paper: `High-Resolution Representations for Labeling Pixels and Regions\n    <https://arxiv.org/abs/1904.04514>`_.\n\n    Args:\n        in_channels (list): number of channels for each branch.\n        out_channels (int): output channels of feature pyramids.\n        num_outs (int): number of output stages.\n        pooling_type (str): pooling for generating feature pyramids\n            from {MAX, AVG}.\n        conv_cfg (dict): dictionary to construct and config conv layer.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n        with_cp  (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed.\n        stride (int): stride of 3x3 convolutional layers\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs=5,\n                 pooling_type='AVG',\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 with_cp=False,\n                 stride=1,\n                 init_cfg=dict(type='Caffe2Xavier', layer='Conv2d')):\n        super(HRFPN, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        self.with_cp = with_cp\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        self.reduction_conv = ConvModule(\n            sum(in_channels),\n            out_channels,\n            kernel_size=1,\n            conv_cfg=self.conv_cfg,\n            act_cfg=None)\n\n        self.fpn_convs = nn.ModuleList()\n        for i in range(self.num_outs):\n            self.fpn_convs.append(\n                ConvModule(\n                    out_channels,\n                    out_channels,\n                    kernel_size=3,\n                    padding=1,\n                    stride=stride,\n                    conv_cfg=self.conv_cfg,\n                    act_cfg=None))\n\n        if pooling_type == 'MAX':\n            self.pooling = F.max_pool2d\n        else:\n            self.pooling = F.avg_pool2d\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == self.num_ins\n        outs = [inputs[0]]\n        for i in range(1, self.num_ins):\n            outs.append(\n                F.interpolate(inputs[i], scale_factor=2**i, mode='bilinear'))\n        out = torch.cat(outs, dim=1)\n        if out.requires_grad and self.with_cp:\n            out = checkpoint(self.reduction_conv, out)\n        else:\n            out = self.reduction_conv(out)\n        outs = [out]\n        for i in range(1, self.num_outs):\n            outs.append(self.pooling(out, kernel_size=2**i, stride=2**i))\n        outputs = []\n\n        for i in range(self.num_outs):\n            if outs[i].requires_grad and self.with_cp:\n                tmp_out = checkpoint(self.fpn_convs[i], outs[i])\n            else:\n                tmp_out = self.fpn_convs[i](outs[i])\n            outputs.append(tmp_out)\n        return tuple(outputs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/nas_fpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops.merge_cells import GlobalPoolingCell, SumCell\nfrom mmcv.runner import BaseModule, ModuleList\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass NASFPN(BaseModule):\n    \"\"\"NAS-FPN.\n\n    Implementation of `NAS-FPN: Learning Scalable Feature Pyramid Architecture\n    for Object Detection <https://arxiv.org/abs/1904.07392>`_\n\n    Args:\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale)\n        num_outs (int): Number of output scales.\n        stack_times (int): The number of times the pyramid architecture will\n            be stacked.\n        start_level (int): Index of the start input backbone level used to\n            build the feature pyramid. Default: 0.\n        end_level (int): Index of the end input backbone level (exclusive) to\n            build the feature pyramid. Default: -1, which means the last level.\n        add_extra_convs (bool): It decides whether to add conv\n            layers on top of the original feature maps. Default to False.\n            If True, its actual mode is specified by `extra_convs_on_inputs`.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 stack_times,\n                 start_level=0,\n                 end_level=-1,\n                 add_extra_convs=False,\n                 norm_cfg=None,\n                 init_cfg=dict(type='Caffe2Xavier', layer='Conv2d')):\n        super(NASFPN, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)  # num of input feature levels\n        self.num_outs = num_outs  # num of output feature levels\n        self.stack_times = stack_times\n        self.norm_cfg = norm_cfg\n\n        if end_level == -1 or end_level == self.num_ins - 1:\n            self.backbone_end_level = self.num_ins\n            assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level is not the last level, no extra level is allowed\n            self.backbone_end_level = end_level + 1\n            assert end_level < self.num_ins\n            assert num_outs == end_level - start_level + 1\n        self.start_level = start_level\n        self.end_level = end_level\n        self.add_extra_convs = add_extra_convs\n\n        # add lateral connections\n        self.lateral_convs = nn.ModuleList()\n        for i in range(self.start_level, self.backbone_end_level):\n            l_conv = ConvModule(\n                in_channels[i],\n                out_channels,\n                1,\n                norm_cfg=norm_cfg,\n                act_cfg=None)\n            self.lateral_convs.append(l_conv)\n\n        # add extra downsample layers (stride-2 pooling or conv)\n        extra_levels = num_outs - self.backbone_end_level + self.start_level\n        self.extra_downsamples = nn.ModuleList()\n        for i in range(extra_levels):\n            extra_conv = ConvModule(\n                out_channels, out_channels, 1, norm_cfg=norm_cfg, act_cfg=None)\n            self.extra_downsamples.append(\n                nn.Sequential(extra_conv, nn.MaxPool2d(2, 2)))\n\n        # add NAS FPN connections\n        self.fpn_stages = ModuleList()\n        for _ in range(self.stack_times):\n            stage = nn.ModuleDict()\n            # gp(p6, p4) -> p4_1\n            stage['gp_64_4'] = GlobalPoolingCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            # sum(p4_1, p4) -> p4_2\n            stage['sum_44_4'] = SumCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            # sum(p4_2, p3) -> p3_out\n            stage['sum_43_3'] = SumCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            # sum(p3_out, p4_2) -> p4_out\n            stage['sum_34_4'] = SumCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            # sum(p5, gp(p4_out, p3_out)) -> p5_out\n            stage['gp_43_5'] = GlobalPoolingCell(with_out_conv=False)\n            stage['sum_55_5'] = SumCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            # sum(p7, gp(p5_out, p4_2)) -> p7_out\n            stage['gp_54_7'] = GlobalPoolingCell(with_out_conv=False)\n            stage['sum_77_7'] = SumCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            # gp(p7_out, p5_out) -> p6_out\n            stage['gp_75_6'] = GlobalPoolingCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                out_norm_cfg=norm_cfg)\n            self.fpn_stages.append(stage)\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        # build P3-P5\n        feats = [\n            lateral_conv(inputs[i + self.start_level])\n            for i, lateral_conv in enumerate(self.lateral_convs)\n        ]\n        # build P6-P7 on top of P5\n        for downsample in self.extra_downsamples:\n            feats.append(downsample(feats[-1]))\n\n        p3, p4, p5, p6, p7 = feats\n\n        for stage in self.fpn_stages:\n            # gp(p6, p4) -> p4_1\n            p4_1 = stage['gp_64_4'](p6, p4, out_size=p4.shape[-2:])\n            # sum(p4_1, p4) -> p4_2\n            p4_2 = stage['sum_44_4'](p4_1, p4, out_size=p4.shape[-2:])\n            # sum(p4_2, p3) -> p3_out\n            p3 = stage['sum_43_3'](p4_2, p3, out_size=p3.shape[-2:])\n            # sum(p3_out, p4_2) -> p4_out\n            p4 = stage['sum_34_4'](p3, p4_2, out_size=p4.shape[-2:])\n            # sum(p5, gp(p4_out, p3_out)) -> p5_out\n            p5_tmp = stage['gp_43_5'](p4, p3, out_size=p5.shape[-2:])\n            p5 = stage['sum_55_5'](p5, p5_tmp, out_size=p5.shape[-2:])\n            # sum(p7, gp(p5_out, p4_2)) -> p7_out\n            p7_tmp = stage['gp_54_7'](p5, p4_2, out_size=p7.shape[-2:])\n            p7 = stage['sum_77_7'](p7, p7_tmp, out_size=p7.shape[-2:])\n            # gp(p7_out, p5_out) -> p6_out\n            p6 = stage['gp_75_6'](p7, p5, out_size=p6.shape[-2:])\n\n        return p3, p4, p5, p6, p7\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/nasfcos_fpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule, caffe2_xavier_init\nfrom mmcv.ops.merge_cells import ConcatCell\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass NASFCOS_FPN(BaseModule):\n    \"\"\"FPN structure in NASFPN.\n\n    Implementation of paper `NAS-FCOS: Fast Neural Architecture Search for\n    Object Detection <https://arxiv.org/abs/1906.04423>`_\n\n    Args:\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale)\n        num_outs (int): Number of output scales.\n        start_level (int): Index of the start input backbone level used to\n            build the feature pyramid. Default: 0.\n        end_level (int): Index of the end input backbone level (exclusive) to\n            build the feature pyramid. Default: -1, which means the last level.\n        add_extra_convs (bool): It decides whether to add conv\n            layers on top of the original feature maps. Default to False.\n            If True, its actual mode is specified by `extra_convs_on_inputs`.\n        conv_cfg (dict): dictionary to construct and config conv layer.\n        norm_cfg (dict): dictionary to construct and config norm layer.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 start_level=1,\n                 end_level=-1,\n                 add_extra_convs=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=None):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(NASFCOS_FPN, self).__init__(init_cfg)\n        assert isinstance(in_channels, list)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        self.num_ins = len(in_channels)\n        self.num_outs = num_outs\n        self.norm_cfg = norm_cfg\n        self.conv_cfg = conv_cfg\n\n        if end_level == -1 or end_level == self.num_ins - 1:\n            self.backbone_end_level = self.num_ins\n            assert num_outs >= self.num_ins - start_level\n        else:\n            # if end_level is not the last level, no extra level is allowed\n            self.backbone_end_level = end_level + 1\n            assert end_level < self.num_ins\n            assert num_outs == end_level - start_level + 1\n        self.start_level = start_level\n        self.end_level = end_level\n        self.add_extra_convs = add_extra_convs\n\n        self.adapt_convs = nn.ModuleList()\n        for i in range(self.start_level, self.backbone_end_level):\n            adapt_conv = ConvModule(\n                in_channels[i],\n                out_channels,\n                1,\n                stride=1,\n                padding=0,\n                bias=False,\n                norm_cfg=dict(type='BN'),\n                act_cfg=dict(type='ReLU', inplace=False))\n            self.adapt_convs.append(adapt_conv)\n\n        # C2 is omitted according to the paper\n        extra_levels = num_outs - self.backbone_end_level + self.start_level\n\n        def build_concat_cell(with_input1_conv, with_input2_conv):\n            cell_conv_cfg = dict(\n                kernel_size=1, padding=0, bias=False, groups=out_channels)\n            return ConcatCell(\n                in_channels=out_channels,\n                out_channels=out_channels,\n                with_out_conv=True,\n                out_conv_cfg=cell_conv_cfg,\n                out_norm_cfg=dict(type='BN'),\n                out_conv_order=('norm', 'act', 'conv'),\n                with_input1_conv=with_input1_conv,\n                with_input2_conv=with_input2_conv,\n                input_conv_cfg=conv_cfg,\n                input_norm_cfg=norm_cfg,\n                upsample_mode='nearest')\n\n        # Denote c3=f0, c4=f1, c5=f2 for convince\n        self.fpn = nn.ModuleDict()\n        self.fpn['c22_1'] = build_concat_cell(True, True)\n        self.fpn['c22_2'] = build_concat_cell(True, True)\n        self.fpn['c32'] = build_concat_cell(True, False)\n        self.fpn['c02'] = build_concat_cell(True, False)\n        self.fpn['c42'] = build_concat_cell(True, True)\n        self.fpn['c36'] = build_concat_cell(True, True)\n        self.fpn['c61'] = build_concat_cell(True, True)  # f9\n        self.extra_downsamples = nn.ModuleList()\n        for i in range(extra_levels):\n            extra_act_cfg = None if i == 0 \\\n                else dict(type='ReLU', inplace=False)\n            self.extra_downsamples.append(\n                ConvModule(\n                    out_channels,\n                    out_channels,\n                    3,\n                    stride=2,\n                    padding=1,\n                    act_cfg=extra_act_cfg,\n                    order=('act', 'norm', 'conv')))\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        feats = [\n            adapt_conv(inputs[i + self.start_level])\n            for i, adapt_conv in enumerate(self.adapt_convs)\n        ]\n\n        for (i, module_name) in enumerate(self.fpn):\n            idx_1, idx_2 = int(module_name[1]), int(module_name[2])\n            res = self.fpn[module_name](feats[idx_1], feats[idx_2])\n            feats.append(res)\n\n        ret = []\n        for (idx, input_idx) in zip([9, 8, 7], [1, 2, 3]):  # add P3, P4, P5\n            feats1, feats2 = feats[idx], feats[5]\n            feats2_resize = F.interpolate(\n                feats2,\n                size=feats1.size()[2:],\n                mode='bilinear',\n                align_corners=False)\n\n            feats_sum = feats1 + feats2_resize\n            ret.append(\n                F.interpolate(\n                    feats_sum,\n                    size=inputs[input_idx].size()[2:],\n                    mode='bilinear',\n                    align_corners=False))\n\n        for submodule in self.extra_downsamples:\n            ret.append(submodule(ret[-1]))\n\n        return tuple(ret)\n\n    def init_weights(self):\n        \"\"\"Initialize the weights of module.\"\"\"\n        super(NASFCOS_FPN, self).init_weights()\n        for module in self.fpn.values():\n            if hasattr(module, 'conv_out'):\n                caffe2_xavier_init(module.out_conv.conv)\n\n        for modules in [\n                self.adapt_convs.modules(),\n                self.extra_downsamples.modules()\n        ]:\n            for module in modules:\n                if isinstance(module, nn.Conv2d):\n                    caffe2_xavier_init(module)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/pafpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import auto_fp16\n\nfrom ..builder import NECKS\nfrom .fpn import FPN\n\n\n@NECKS.register_module()\nclass PAFPN(FPN):\n    \"\"\"Path Aggregation Network for Instance Segmentation.\n\n    This is an implementation of the `PAFPN in Path Aggregation Network\n    <https://arxiv.org/abs/1803.01534>`_.\n\n    Args:\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale)\n        num_outs (int): Number of output scales.\n        start_level (int): Index of the start input backbone level used to\n            build the feature pyramid. Default: 0.\n        end_level (int): Index of the end input backbone level (exclusive) to\n            build the feature pyramid. Default: -1, which means the last level.\n        add_extra_convs (bool | str): If bool, it decides whether to add conv\n            layers on top of the original feature maps. Default to False.\n            If True, it is equivalent to `add_extra_convs='on_input'`.\n            If str, it specifies the source feature map of the extra convs.\n            Only the following options are allowed\n\n            - 'on_input': Last feat map of neck inputs (i.e. backbone feature).\n            - 'on_lateral':  Last feature map after lateral convs.\n            - 'on_output': The last output feature map after fpn convs.\n        relu_before_extra_convs (bool): Whether to apply relu before the extra\n            conv. Default: False.\n        no_norm_on_lateral (bool): Whether to apply norm on lateral.\n            Default: False.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        act_cfg (str): Config dict for activation layer in ConvModule.\n            Default: None.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_outs,\n                 start_level=0,\n                 end_level=-1,\n                 add_extra_convs=False,\n                 relu_before_extra_convs=False,\n                 no_norm_on_lateral=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=None,\n                 init_cfg=dict(\n                     type='Xavier', layer='Conv2d', distribution='uniform')):\n        super(PAFPN, self).__init__(\n            in_channels,\n            out_channels,\n            num_outs,\n            start_level,\n            end_level,\n            add_extra_convs,\n            relu_before_extra_convs,\n            no_norm_on_lateral,\n            conv_cfg,\n            norm_cfg,\n            act_cfg,\n            init_cfg=init_cfg)\n        # add extra bottom up pathway\n        self.downsample_convs = nn.ModuleList()\n        self.pafpn_convs = nn.ModuleList()\n        for i in range(self.start_level + 1, self.backbone_end_level):\n            d_conv = ConvModule(\n                out_channels,\n                out_channels,\n                3,\n                stride=2,\n                padding=1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg,\n                inplace=False)\n            pafpn_conv = ConvModule(\n                out_channels,\n                out_channels,\n                3,\n                padding=1,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg,\n                inplace=False)\n            self.downsample_convs.append(d_conv)\n            self.pafpn_convs.append(pafpn_conv)\n\n    @auto_fp16()\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        assert len(inputs) == len(self.in_channels)\n\n        # build laterals\n        laterals = [\n            lateral_conv(inputs[i + self.start_level])\n            for i, lateral_conv in enumerate(self.lateral_convs)\n        ]\n\n        # build top-down path\n        used_backbone_levels = len(laterals)\n        for i in range(used_backbone_levels - 1, 0, -1):\n            prev_shape = laterals[i - 1].shape[2:]\n            # fix runtime error of \"+=\" inplace operation in PyTorch 1.10\n            laterals[i - 1] = laterals[i - 1] + F.interpolate(\n                laterals[i], size=prev_shape, mode='nearest')\n\n        # build outputs\n        # part 1: from original levels\n        inter_outs = [\n            self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels)\n        ]\n\n        # part 2: add bottom-up path\n        for i in range(0, used_backbone_levels - 1):\n            inter_outs[i + 1] += self.downsample_convs[i](inter_outs[i])\n\n        outs = []\n        outs.append(inter_outs[0])\n        outs.extend([\n            self.pafpn_convs[i - 1](inter_outs[i])\n            for i in range(1, used_backbone_levels)\n        ])\n\n        # part 3: add extra levels\n        if self.num_outs > len(outs):\n            # use max pool to get more levels on top of outputs\n            # (e.g., Faster R-CNN, Mask R-CNN)\n            if not self.add_extra_convs:\n                for i in range(self.num_outs - used_backbone_levels):\n                    outs.append(F.max_pool2d(outs[-1], 1, stride=2))\n            # add conv layers on top of original feature maps (RetinaNet)\n            else:\n                if self.add_extra_convs == 'on_input':\n                    orig = inputs[self.backbone_end_level - 1]\n                    outs.append(self.fpn_convs[used_backbone_levels](orig))\n                elif self.add_extra_convs == 'on_lateral':\n                    outs.append(self.fpn_convs[used_backbone_levels](\n                        laterals[-1]))\n                elif self.add_extra_convs == 'on_output':\n                    outs.append(self.fpn_convs[used_backbone_levels](outs[-1]))\n                else:\n                    raise NotImplementedError\n                for i in range(used_backbone_levels + 1, self.num_outs):\n                    if self.relu_before_extra_convs:\n                        outs.append(self.fpn_convs[i](F.relu(outs[-1])))\n                    else:\n                        outs.append(self.fpn_convs[i](outs[-1]))\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/rfp.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import constant_init, xavier_init\nfrom mmcv.runner import BaseModule, ModuleList\n\nfrom ..builder import NECKS, build_backbone\nfrom .fpn import FPN\n\n\nclass ASPP(BaseModule):\n    \"\"\"ASPP (Atrous Spatial Pyramid Pooling)\n\n    This is an implementation of the ASPP module used in DetectoRS\n    (https://arxiv.org/pdf/2006.02334.pdf)\n\n    Args:\n        in_channels (int): Number of input channels.\n        out_channels (int): Number of channels produced by this module\n        dilations (tuple[int]): Dilations of the four branches.\n            Default: (1, 3, 6, 1)\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 dilations=(1, 3, 6, 1),\n                 init_cfg=dict(type='Kaiming', layer='Conv2d')):\n        super().__init__(init_cfg)\n        assert dilations[-1] == 1\n        self.aspp = nn.ModuleList()\n        for dilation in dilations:\n            kernel_size = 3 if dilation > 1 else 1\n            padding = dilation if dilation > 1 else 0\n            conv = nn.Conv2d(\n                in_channels,\n                out_channels,\n                kernel_size=kernel_size,\n                stride=1,\n                dilation=dilation,\n                padding=padding,\n                bias=True)\n            self.aspp.append(conv)\n        self.gap = nn.AdaptiveAvgPool2d(1)\n\n    def forward(self, x):\n        avg_x = self.gap(x)\n        out = []\n        for aspp_idx in range(len(self.aspp)):\n            inp = avg_x if (aspp_idx == len(self.aspp) - 1) else x\n            out.append(F.relu_(self.aspp[aspp_idx](inp)))\n        out[-1] = out[-1].expand_as(out[-2])\n        out = torch.cat(out, dim=1)\n        return out\n\n\n@NECKS.register_module()\nclass RFP(FPN):\n    \"\"\"RFP (Recursive Feature Pyramid)\n\n    This is an implementation of RFP in `DetectoRS\n    <https://arxiv.org/pdf/2006.02334.pdf>`_. Different from standard FPN, the\n    input of RFP should be multi level features along with origin input image\n    of backbone.\n\n    Args:\n        rfp_steps (int): Number of unrolled steps of RFP.\n        rfp_backbone (dict): Configuration of the backbone for RFP.\n        aspp_out_channels (int): Number of output channels of ASPP module.\n        aspp_dilations (tuple[int]): Dilation rates of four branches.\n            Default: (1, 3, 6, 1)\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 rfp_steps,\n                 rfp_backbone,\n                 aspp_out_channels,\n                 aspp_dilations=(1, 3, 6, 1),\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super().__init__(init_cfg=init_cfg, **kwargs)\n        self.rfp_steps = rfp_steps\n        # Be careful! Pretrained weights cannot be loaded when use\n        # nn.ModuleList\n        self.rfp_modules = ModuleList()\n        for rfp_idx in range(1, rfp_steps):\n            rfp_module = build_backbone(rfp_backbone)\n            self.rfp_modules.append(rfp_module)\n        self.rfp_aspp = ASPP(self.out_channels, aspp_out_channels,\n                             aspp_dilations)\n        self.rfp_weight = nn.Conv2d(\n            self.out_channels,\n            1,\n            kernel_size=1,\n            stride=1,\n            padding=0,\n            bias=True)\n\n    def init_weights(self):\n        # Avoid using super().init_weights(), which may alter the default\n        # initialization of the modules in self.rfp_modules that have missing\n        # keys in the pretrained checkpoint.\n        for convs in [self.lateral_convs, self.fpn_convs]:\n            for m in convs.modules():\n                if isinstance(m, nn.Conv2d):\n                    xavier_init(m, distribution='uniform')\n        for rfp_idx in range(self.rfp_steps - 1):\n            self.rfp_modules[rfp_idx].init_weights()\n        constant_init(self.rfp_weight, 0)\n\n    def forward(self, inputs):\n        inputs = list(inputs)\n        assert len(inputs) == len(self.in_channels) + 1  # +1 for input image\n        img = inputs.pop(0)\n        # FPN forward\n        x = super().forward(tuple(inputs))\n        for rfp_idx in range(self.rfp_steps - 1):\n            rfp_feats = [x[0]] + list(\n                self.rfp_aspp(x[i]) for i in range(1, len(x)))\n            x_idx = self.rfp_modules[rfp_idx].rfp_forward(img, rfp_feats)\n            # FPN forward\n            x_idx = super().forward(x_idx)\n            x_new = []\n            for ft_idx in range(len(x_idx)):\n                add_weight = torch.sigmoid(self.rfp_weight(x_idx[ft_idx]))\n                x_new.append(add_weight * x_idx[ft_idx] +\n                             (1 - add_weight) * x[ft_idx])\n            x = x_new\n        return x\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/ssd_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, DepthwiseSeparableConvModule\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\n\n\n@NECKS.register_module()\nclass SSDNeck(BaseModule):\n    \"\"\"Extra layers of SSD backbone to generate multi-scale feature maps.\n\n    Args:\n        in_channels (Sequence[int]): Number of input channels per scale.\n        out_channels (Sequence[int]): Number of output channels per scale.\n        level_strides (Sequence[int]): Stride of 3x3 conv per level.\n        level_paddings (Sequence[int]): Padding size of 3x3 conv per level.\n        l2_norm_scale (float|None): L2 normalization layer init scale.\n            If None, not use L2 normalization on the first input feature.\n        last_kernel_size (int): Kernel size of the last conv layer.\n            Default: 3.\n        use_depthwise (bool): Whether to use DepthwiseSeparableConv.\n            Default: False.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: None.\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='ReLU').\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 level_strides,\n                 level_paddings,\n                 l2_norm_scale=20.,\n                 last_kernel_size=3,\n                 use_depthwise=False,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=[\n                     dict(\n                         type='Xavier', distribution='uniform',\n                         layer='Conv2d'),\n                     dict(type='Constant', val=1, layer='BatchNorm2d'),\n                 ]):\n        super(SSDNeck, self).__init__(init_cfg)\n        assert len(out_channels) > len(in_channels)\n        assert len(out_channels) - len(in_channels) == len(level_strides)\n        assert len(level_strides) == len(level_paddings)\n        assert in_channels == out_channels[:len(in_channels)]\n\n        if l2_norm_scale:\n            self.l2_norm = L2Norm(in_channels[0], l2_norm_scale)\n            self.init_cfg += [\n                dict(\n                    type='Constant',\n                    val=self.l2_norm.scale,\n                    override=dict(name='l2_norm'))\n            ]\n\n        self.extra_layers = nn.ModuleList()\n        extra_layer_channels = out_channels[len(in_channels):]\n        second_conv = DepthwiseSeparableConvModule if \\\n            use_depthwise else ConvModule\n\n        for i, (out_channel, stride, padding) in enumerate(\n                zip(extra_layer_channels, level_strides, level_paddings)):\n            kernel_size = last_kernel_size \\\n                if i == len(extra_layer_channels) - 1 else 3\n            per_lvl_convs = nn.Sequential(\n                ConvModule(\n                    out_channels[len(in_channels) - 1 + i],\n                    out_channel // 2,\n                    1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg),\n                second_conv(\n                    out_channel // 2,\n                    out_channel,\n                    kernel_size,\n                    stride=stride,\n                    padding=padding,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n            self.extra_layers.append(per_lvl_convs)\n\n    def forward(self, inputs):\n        \"\"\"Forward function.\"\"\"\n        outs = [feat for feat in inputs]\n        if hasattr(self, 'l2_norm'):\n            outs[0] = self.l2_norm(outs[0])\n\n        feat = outs[-1]\n        for layer in self.extra_layers:\n            feat = layer(feat)\n            outs.append(feat)\n        return tuple(outs)\n\n\nclass L2Norm(nn.Module):\n\n    def __init__(self, n_dims, scale=20., eps=1e-10):\n        \"\"\"L2 normalization layer.\n\n        Args:\n            n_dims (int): Number of dimensions to be normalized\n            scale (float, optional): Defaults to 20..\n            eps (float, optional): Used to avoid division by zero.\n                Defaults to 1e-10.\n        \"\"\"\n        super(L2Norm, self).__init__()\n        self.n_dims = n_dims\n        self.weight = nn.Parameter(torch.Tensor(self.n_dims))\n        self.eps = eps\n        self.scale = scale\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        # normalization layer convert to FP32 in FP16 training\n        x_float = x.float()\n        norm = x_float.pow(2).sum(1, keepdim=True).sqrt() + self.eps\n        return (self.weight[None, :, None, None].float().expand_as(x_float) *\n                x_float / norm).type_as(x)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/yolo_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Copyright (c) 2019 Western Digital Corporation or its affiliates.\n\nimport torch\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\n\n\nclass DetectionBlock(BaseModule):\n    \"\"\"Detection block in YOLO neck.\n\n    Let out_channels = n, the DetectionBlock contains:\n    Six ConvLayers, 1 Conv2D Layer and 1 YoloLayer.\n    The first 6 ConvLayers are formed the following way:\n        1x1xn, 3x3x2n, 1x1xn, 3x3x2n, 1x1xn, 3x3x2n.\n    The Conv2D layer is 1x1x255.\n    Some block will have branch after the fifth ConvLayer.\n    The input channel is arbitrary (in_channels)\n\n    Args:\n        in_channels (int): The number of input channels.\n        out_channels (int): The number of output channels.\n        conv_cfg (dict): Config dict for convolution layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Default: dict(type='BN', requires_grad=True)\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='LeakyReLU', negative_slope=0.1).\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 act_cfg=dict(type='LeakyReLU', negative_slope=0.1),\n                 init_cfg=None):\n        super(DetectionBlock, self).__init__(init_cfg)\n        double_out_channels = out_channels * 2\n\n        # shortcut\n        cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)\n        self.conv1 = ConvModule(in_channels, out_channels, 1, **cfg)\n        self.conv2 = ConvModule(\n            out_channels, double_out_channels, 3, padding=1, **cfg)\n        self.conv3 = ConvModule(double_out_channels, out_channels, 1, **cfg)\n        self.conv4 = ConvModule(\n            out_channels, double_out_channels, 3, padding=1, **cfg)\n        self.conv5 = ConvModule(double_out_channels, out_channels, 1, **cfg)\n\n    def forward(self, x):\n        tmp = self.conv1(x)\n        tmp = self.conv2(tmp)\n        tmp = self.conv3(tmp)\n        tmp = self.conv4(tmp)\n        out = self.conv5(tmp)\n        return out\n\n\n@NECKS.register_module()\nclass YOLOV3Neck(BaseModule):\n    \"\"\"The neck of YOLOV3.\n\n    It can be treated as a simplified version of FPN. It\n    will take the result from Darknet backbone and do some upsampling and\n    concatenation. It will finally output the detection result.\n\n    Note:\n        The input feats should be from top to bottom.\n            i.e., from high-lvl to low-lvl\n        But YOLOV3Neck will process them in reversed order.\n            i.e., from bottom (high-lvl) to top (low-lvl)\n\n    Args:\n        num_scales (int): The number of scales / stages.\n        in_channels (List[int]): The number of input channels per scale.\n        out_channels (List[int]): The number of output channels  per scale.\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Default: None.\n        norm_cfg (dict, optional): Dictionary to construct and config norm\n            layer. Default: dict(type='BN', requires_grad=True)\n        act_cfg (dict, optional): Config dict for activation layer.\n            Default: dict(type='LeakyReLU', negative_slope=0.1).\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 num_scales,\n                 in_channels,\n                 out_channels,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 act_cfg=dict(type='LeakyReLU', negative_slope=0.1),\n                 init_cfg=None):\n        super(YOLOV3Neck, self).__init__(init_cfg)\n        assert (num_scales == len(in_channels) == len(out_channels))\n        self.num_scales = num_scales\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n\n        # shortcut\n        cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg)\n\n        # To support arbitrary scales, the code looks awful, but it works.\n        # Better solution is welcomed.\n        self.detect1 = DetectionBlock(in_channels[0], out_channels[0], **cfg)\n        for i in range(1, self.num_scales):\n            in_c, out_c = self.in_channels[i], self.out_channels[i]\n            inter_c = out_channels[i - 1]\n            self.add_module(f'conv{i}', ConvModule(inter_c, out_c, 1, **cfg))\n            # in_c + out_c : High-lvl feats will be cat with low-lvl feats\n            self.add_module(f'detect{i+1}',\n                            DetectionBlock(in_c + out_c, out_c, **cfg))\n\n    def forward(self, feats):\n        assert len(feats) == self.num_scales\n\n        # processed from bottom (high-lvl) to top (low-lvl)\n        outs = []\n        out = self.detect1(feats[-1])\n        outs.append(out)\n\n        for i, x in enumerate(reversed(feats[:-1])):\n            conv = getattr(self, f'conv{i+1}')\n            tmp = conv(out)\n\n            # Cat with low-lvl feats\n            tmp = F.interpolate(tmp, scale_factor=2)\n            tmp = torch.cat((tmp, x), 1)\n\n            detect = getattr(self, f'detect{i+2}')\n            out = detect(tmp)\n            outs.append(out)\n\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/necks/yolox_pafpn.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, DepthwiseSeparableConvModule\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import NECKS\nfrom ..utils import CSPLayer\n\n\n@NECKS.register_module()\nclass YOLOXPAFPN(BaseModule):\n    \"\"\"Path Aggregation Network used in YOLOX.\n\n    Args:\n        in_channels (List[int]): Number of input channels per scale.\n        out_channels (int): Number of output channels (used at each scale)\n        num_csp_blocks (int): Number of bottlenecks in CSPLayer. Default: 3\n        use_depthwise (bool): Whether to depthwise separable convolution in\n            blocks. Default: False\n        upsample_cfg (dict): Config dict for interpolate layer.\n            Default: `dict(scale_factor=2, mode='nearest')`\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Default: None, which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN')\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='Swish')\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 num_csp_blocks=3,\n                 use_depthwise=False,\n                 upsample_cfg=dict(scale_factor=2, mode='nearest'),\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 init_cfg=dict(\n                     type='Kaiming',\n                     layer='Conv2d',\n                     a=math.sqrt(5),\n                     distribution='uniform',\n                     mode='fan_in',\n                     nonlinearity='leaky_relu')):\n        super(YOLOXPAFPN, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n\n        conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule\n\n        # build top-down blocks\n        self.upsample = nn.Upsample(**upsample_cfg)\n        self.reduce_layers = nn.ModuleList()\n        self.top_down_blocks = nn.ModuleList()\n        for idx in range(len(in_channels) - 1, 0, -1):\n            self.reduce_layers.append(\n                ConvModule(\n                    in_channels[idx],\n                    in_channels[idx - 1],\n                    1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n            self.top_down_blocks.append(\n                CSPLayer(\n                    in_channels[idx - 1] * 2,\n                    in_channels[idx - 1],\n                    num_blocks=num_csp_blocks,\n                    add_identity=False,\n                    use_depthwise=use_depthwise,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n\n        # build bottom-up blocks\n        self.downsamples = nn.ModuleList()\n        self.bottom_up_blocks = nn.ModuleList()\n        for idx in range(len(in_channels) - 1):\n            self.downsamples.append(\n                conv(\n                    in_channels[idx],\n                    in_channels[idx],\n                    3,\n                    stride=2,\n                    padding=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n            self.bottom_up_blocks.append(\n                CSPLayer(\n                    in_channels[idx] * 2,\n                    in_channels[idx + 1],\n                    num_blocks=num_csp_blocks,\n                    add_identity=False,\n                    use_depthwise=use_depthwise,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n\n        self.out_convs = nn.ModuleList()\n        for i in range(len(in_channels)):\n            self.out_convs.append(\n                ConvModule(\n                    in_channels[i],\n                    out_channels,\n                    1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    act_cfg=act_cfg))\n\n    def forward(self, inputs):\n        \"\"\"\n        Args:\n            inputs (tuple[Tensor]): input features.\n\n        Returns:\n            tuple[Tensor]: YOLOXPAFPN features.\n        \"\"\"\n        assert len(inputs) == len(self.in_channels)\n\n        # top-down path\n        inner_outs = [inputs[-1]]\n        for idx in range(len(self.in_channels) - 1, 0, -1):\n            feat_heigh = inner_outs[0]\n            feat_low = inputs[idx - 1]\n            feat_heigh = self.reduce_layers[len(self.in_channels) - 1 - idx](\n                feat_heigh)\n            inner_outs[0] = feat_heigh\n\n            upsample_feat = self.upsample(feat_heigh)\n\n            inner_out = self.top_down_blocks[len(self.in_channels) - 1 - idx](\n                torch.cat([upsample_feat, feat_low], 1))\n            inner_outs.insert(0, inner_out)\n\n        # bottom-up path\n        outs = [inner_outs[0]]\n        for idx in range(len(self.in_channels) - 1):\n            feat_low = outs[-1]\n            feat_height = inner_outs[idx + 1]\n            downsample_feat = self.downsamples[idx](feat_low)\n            out = self.bottom_up_blocks[idx](\n                torch.cat([downsample_feat, feat_height], 1))\n            outs.append(out)\n\n        # out convs\n        for idx, conv in enumerate(self.out_convs):\n            outs[idx] = conv(outs[idx])\n\n        return tuple(outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/plugins/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .dropblock import DropBlock\nfrom .msdeformattn_pixel_decoder import MSDeformAttnPixelDecoder\nfrom .pixel_decoder import PixelDecoder, TransformerEncoderPixelDecoder\n\n__all__ = [\n    'DropBlock', 'PixelDecoder', 'TransformerEncoderPixelDecoder',\n    'MSDeformAttnPixelDecoder'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/plugins/dropblock.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import PLUGIN_LAYERS\n\neps = 1e-6\n\n\n@PLUGIN_LAYERS.register_module()\nclass DropBlock(nn.Module):\n    \"\"\"Randomly drop some regions of feature maps.\n\n     Please refer to the method proposed in `DropBlock\n     <https://arxiv.org/abs/1810.12890>`_ for details.\n\n    Args:\n        drop_prob (float): The probability of dropping each block.\n        block_size (int): The size of dropped blocks.\n        warmup_iters (int): The drop probability will linearly increase\n            from `0` to `drop_prob` during the first `warmup_iters` iterations.\n            Default: 2000.\n    \"\"\"\n\n    def __init__(self, drop_prob, block_size, warmup_iters=2000, **kwargs):\n        super(DropBlock, self).__init__()\n        assert block_size % 2 == 1\n        assert 0 < drop_prob <= 1\n        assert warmup_iters >= 0\n        self.drop_prob = drop_prob\n        self.block_size = block_size\n        self.warmup_iters = warmup_iters\n        self.iter_cnt = 0\n\n    def forward(self, x):\n        \"\"\"\n        Args:\n            x (Tensor): Input feature map on which some areas will be randomly\n                dropped.\n\n        Returns:\n            Tensor: The tensor after DropBlock layer.\n        \"\"\"\n        if not self.training:\n            return x\n        self.iter_cnt += 1\n        N, C, H, W = list(x.shape)\n        gamma = self._compute_gamma((H, W))\n        mask_shape = (N, C, H - self.block_size + 1, W - self.block_size + 1)\n        mask = torch.bernoulli(torch.full(mask_shape, gamma, device=x.device))\n\n        mask = F.pad(mask, [self.block_size // 2] * 4, value=0)\n        mask = F.max_pool2d(\n            input=mask,\n            stride=(1, 1),\n            kernel_size=(self.block_size, self.block_size),\n            padding=self.block_size // 2)\n        mask = 1 - mask\n        x = x * mask * mask.numel() / (eps + mask.sum())\n        return x\n\n    def _compute_gamma(self, feat_size):\n        \"\"\"Compute the value of gamma according to paper. gamma is the\n        parameter of bernoulli distribution, which controls the number of\n        features to drop.\n\n        gamma = (drop_prob * fm_area) / (drop_area * keep_area)\n\n        Args:\n            feat_size (tuple[int, int]): The height and width of feature map.\n\n        Returns:\n            float: The value of gamma.\n        \"\"\"\n        gamma = (self.drop_prob * feat_size[0] * feat_size[1])\n        gamma /= ((feat_size[0] - self.block_size + 1) *\n                  (feat_size[1] - self.block_size + 1))\n        gamma /= (self.block_size**2)\n        factor = (1.0 if self.iter_cnt > self.warmup_iters else self.iter_cnt /\n                  self.warmup_iters)\n        return gamma * factor\n\n    def extra_repr(self):\n        return (f'drop_prob={self.drop_prob}, block_size={self.block_size}, '\n                f'warmup_iters={self.warmup_iters}')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/plugins/msdeformattn_pixel_decoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (PLUGIN_LAYERS, Conv2d, ConvModule, caffe2_xavier_init,\n                      normal_init, xavier_init)\nfrom mmcv.cnn.bricks.transformer import (build_positional_encoding,\n                                         build_transformer_layer_sequence)\nfrom mmcv.runner import BaseModule, ModuleList\n\nfrom mmdet.core.anchor import MlvlPointGenerator\nfrom mmdet.models.utils.transformer import MultiScaleDeformableAttention\n\n\n@PLUGIN_LAYERS.register_module()\nclass MSDeformAttnPixelDecoder(BaseModule):\n    \"\"\"Pixel decoder with multi-scale deformable attention.\n\n    Args:\n        in_channels (list[int] | tuple[int]): Number of channels in the\n            input feature maps.\n        strides (list[int] | tuple[int]): Output strides of feature from\n            backbone.\n        feat_channels (int): Number of channels for feature.\n        out_channels (int): Number of channels for output.\n        num_outs (int): Number of output scales.\n        norm_cfg (:obj:`mmcv.ConfigDict` | dict): Config for normalization.\n            Defaults to dict(type='GN', num_groups=32).\n        act_cfg (:obj:`mmcv.ConfigDict` | dict): Config for activation.\n            Defaults to dict(type='ReLU').\n        encoder (:obj:`mmcv.ConfigDict` | dict): Config for transformer\n            encoder. Defaults to `DetrTransformerEncoder`.\n        positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer encoder position encoding. Defaults to\n            dict(type='SinePositionalEncoding', num_feats=128,\n            normalize=True).\n        init_cfg (:obj:`mmcv.ConfigDict` | dict): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=[256, 512, 1024, 2048],\n                 strides=[4, 8, 16, 32],\n                 feat_channels=256,\n                 out_channels=256,\n                 num_outs=3,\n                 norm_cfg=dict(type='GN', num_groups=32),\n                 act_cfg=dict(type='ReLU'),\n                 encoder=dict(\n                     type='DetrTransformerEncoder',\n                     num_layers=6,\n                     transformerlayers=dict(\n                         type='BaseTransformerLayer',\n                         attn_cfgs=dict(\n                             type='MultiScaleDeformableAttention',\n                             embed_dims=256,\n                             num_heads=8,\n                             num_levels=3,\n                             num_points=4,\n                             im2col_step=64,\n                             dropout=0.0,\n                             batch_first=False,\n                             norm_cfg=None,\n                             init_cfg=None),\n                         feedforward_channels=1024,\n                         ffn_dropout=0.0,\n                         operation_order=('self_attn', 'norm', 'ffn', 'norm')),\n                     init_cfg=None),\n                 positional_encoding=dict(\n                     type='SinePositionalEncoding',\n                     num_feats=128,\n                     normalize=True),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.strides = strides\n        self.num_input_levels = len(in_channels)\n        self.num_encoder_levels = \\\n            encoder.transformerlayers.attn_cfgs.num_levels\n        assert self.num_encoder_levels >= 1, \\\n            'num_levels in attn_cfgs must be at least one'\n        input_conv_list = []\n        # from top to down (low to high resolution)\n        for i in range(self.num_input_levels - 1,\n                       self.num_input_levels - self.num_encoder_levels - 1,\n                       -1):\n            input_conv = ConvModule(\n                in_channels[i],\n                feat_channels,\n                kernel_size=1,\n                norm_cfg=norm_cfg,\n                act_cfg=None,\n                bias=True)\n            input_conv_list.append(input_conv)\n        self.input_convs = ModuleList(input_conv_list)\n\n        self.encoder = build_transformer_layer_sequence(encoder)\n        self.postional_encoding = build_positional_encoding(\n            positional_encoding)\n        # high resolution to low resolution\n        self.level_encoding = nn.Embedding(self.num_encoder_levels,\n                                           feat_channels)\n\n        # fpn-like structure\n        self.lateral_convs = ModuleList()\n        self.output_convs = ModuleList()\n        self.use_bias = norm_cfg is None\n        # from top to down (low to high resolution)\n        # fpn for the rest features that didn't pass in encoder\n        for i in range(self.num_input_levels - self.num_encoder_levels - 1, -1,\n                       -1):\n            lateral_conv = ConvModule(\n                in_channels[i],\n                feat_channels,\n                kernel_size=1,\n                bias=self.use_bias,\n                norm_cfg=norm_cfg,\n                act_cfg=None)\n            output_conv = ConvModule(\n                feat_channels,\n                feat_channels,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=self.use_bias,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg)\n            self.lateral_convs.append(lateral_conv)\n            self.output_convs.append(output_conv)\n\n        self.mask_feature = Conv2d(\n            feat_channels, out_channels, kernel_size=1, stride=1, padding=0)\n\n        self.num_outs = num_outs\n        self.point_generator = MlvlPointGenerator(strides)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        for i in range(0, self.num_encoder_levels):\n            xavier_init(\n                self.input_convs[i].conv,\n                gain=1,\n                bias=0,\n                distribution='uniform')\n\n        for i in range(0, self.num_input_levels - self.num_encoder_levels):\n            caffe2_xavier_init(self.lateral_convs[i].conv, bias=0)\n            caffe2_xavier_init(self.output_convs[i].conv, bias=0)\n\n        caffe2_xavier_init(self.mask_feature, bias=0)\n\n        normal_init(self.level_encoding, mean=0, std=1)\n        for p in self.encoder.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_normal_(p)\n\n        # init_weights defined in MultiScaleDeformableAttention\n        for layer in self.encoder.layers:\n            for attn in layer.attentions:\n                if isinstance(attn, MultiScaleDeformableAttention):\n                    attn.init_weights()\n\n    def forward(self, feats):\n        \"\"\"\n        Args:\n            feats (list[Tensor]): Feature maps of each level. Each has\n                shape of (batch_size, c, h, w).\n\n        Returns:\n            tuple: A tuple containing the following:\n\n            - mask_feature (Tensor): shape (batch_size, c, h, w).\n            - multi_scale_features (list[Tensor]): Multi scale \\\n                    features, each in shape (batch_size, c, h, w).\n        \"\"\"\n        # generate padding mask for each level, for each image\n        batch_size = feats[0].shape[0]\n        encoder_input_list = []\n        padding_mask_list = []\n        level_positional_encoding_list = []\n        spatial_shapes = []\n        reference_points_list = []\n        for i in range(self.num_encoder_levels):\n            level_idx = self.num_input_levels - i - 1\n            feat = feats[level_idx]\n            feat_projected = self.input_convs[i](feat)\n            h, w = feat.shape[-2:]\n\n            # no padding\n            padding_mask_resized = feat.new_zeros(\n                (batch_size, ) + feat.shape[-2:], dtype=torch.bool)\n            pos_embed = self.postional_encoding(padding_mask_resized)\n            level_embed = self.level_encoding.weight[i]\n            level_pos_embed = level_embed.view(1, -1, 1, 1) + pos_embed\n            # (h_i * w_i, 2)\n            reference_points = self.point_generator.single_level_grid_priors(\n                feat.shape[-2:], level_idx, device=feat.device)\n            # normalize\n            factor = feat.new_tensor([[w, h]]) * self.strides[level_idx]\n            reference_points = reference_points / factor\n\n            # shape (batch_size, c, h_i, w_i) -> (h_i * w_i, batch_size, c)\n            feat_projected = feat_projected.flatten(2).permute(2, 0, 1)\n            level_pos_embed = level_pos_embed.flatten(2).permute(2, 0, 1)\n            padding_mask_resized = padding_mask_resized.flatten(1)\n\n            encoder_input_list.append(feat_projected)\n            padding_mask_list.append(padding_mask_resized)\n            level_positional_encoding_list.append(level_pos_embed)\n            spatial_shapes.append(feat.shape[-2:])\n            reference_points_list.append(reference_points)\n        # shape (batch_size, total_num_query),\n        # total_num_query=sum([., h_i * w_i,.])\n        padding_masks = torch.cat(padding_mask_list, dim=1)\n        # shape (total_num_query, batch_size, c)\n        encoder_inputs = torch.cat(encoder_input_list, dim=0)\n        level_positional_encodings = torch.cat(\n            level_positional_encoding_list, dim=0)\n        device = encoder_inputs.device\n        # shape (num_encoder_levels, 2), from low\n        # resolution to high resolution\n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=device)\n        # shape (0, h_0*w_0, h_0*w_0+h_1*w_1, ...)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))\n        reference_points = torch.cat(reference_points_list, dim=0)\n        reference_points = reference_points[None, :, None].repeat(\n            batch_size, 1, self.num_encoder_levels, 1)\n        valid_radios = reference_points.new_ones(\n            (batch_size, self.num_encoder_levels, 2))\n        # shape (num_total_query, batch_size, c)\n        memory = self.encoder(\n            query=encoder_inputs,\n            key=None,\n            value=None,\n            query_pos=level_positional_encodings,\n            key_pos=None,\n            attn_masks=None,\n            key_padding_mask=None,\n            query_key_padding_mask=padding_masks,\n            spatial_shapes=spatial_shapes,\n            reference_points=reference_points,\n            level_start_index=level_start_index,\n            valid_radios=valid_radios)\n        # (num_total_query, batch_size, c) -> (batch_size, c, num_total_query)\n        memory = memory.permute(1, 2, 0)\n\n        # from low resolution to high resolution\n        num_query_per_level = [e[0] * e[1] for e in spatial_shapes]\n        outs = torch.split(memory, num_query_per_level, dim=-1)\n        outs = [\n            x.reshape(batch_size, -1, spatial_shapes[i][0],\n                      spatial_shapes[i][1]) for i, x in enumerate(outs)\n        ]\n\n        for i in range(self.num_input_levels - self.num_encoder_levels - 1, -1,\n                       -1):\n            x = feats[i]\n            cur_feat = self.lateral_convs[i](x)\n            y = cur_feat + F.interpolate(\n                outs[-1],\n                size=cur_feat.shape[-2:],\n                mode='bilinear',\n                align_corners=False)\n            y = self.output_convs[i](y)\n            outs.append(y)\n        multi_scale_features = outs[:self.num_outs]\n\n        mask_feature = self.mask_feature(outs[-1])\n        return mask_feature, multi_scale_features\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/plugins/pixel_decoder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import PLUGIN_LAYERS, Conv2d, ConvModule, caffe2_xavier_init\nfrom mmcv.cnn.bricks.transformer import (build_positional_encoding,\n                                         build_transformer_layer_sequence)\nfrom mmcv.runner import BaseModule, ModuleList\n\n\n@PLUGIN_LAYERS.register_module()\nclass PixelDecoder(BaseModule):\n    \"\"\"Pixel decoder with a structure like fpn.\n\n    Args:\n        in_channels (list[int] | tuple[int]): Number of channels in the\n            input feature maps.\n        feat_channels (int): Number channels for feature.\n        out_channels (int): Number channels for output.\n        norm_cfg (:obj:`mmcv.ConfigDict` | dict): Config for normalization.\n            Defaults to dict(type='GN', num_groups=32).\n        act_cfg (:obj:`mmcv.ConfigDict` | dict): Config for activation.\n            Defaults to dict(type='ReLU').\n        encoder (:obj:`mmcv.ConfigDict` | dict): Config for transorformer\n            encoder.Defaults to None.\n        positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer encoder position encoding. Defaults to\n            dict(type='SinePositionalEncoding', num_feats=128,\n            normalize=True).\n        init_cfg (:obj:`mmcv.ConfigDict` | dict):  Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 feat_channels,\n                 out_channels,\n                 norm_cfg=dict(type='GN', num_groups=32),\n                 act_cfg=dict(type='ReLU'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.in_channels = in_channels\n        self.num_inputs = len(in_channels)\n        self.lateral_convs = ModuleList()\n        self.output_convs = ModuleList()\n        self.use_bias = norm_cfg is None\n        for i in range(0, self.num_inputs - 1):\n            lateral_conv = ConvModule(\n                in_channels[i],\n                feat_channels,\n                kernel_size=1,\n                bias=self.use_bias,\n                norm_cfg=norm_cfg,\n                act_cfg=None)\n            output_conv = ConvModule(\n                feat_channels,\n                feat_channels,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=self.use_bias,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg)\n            self.lateral_convs.append(lateral_conv)\n            self.output_convs.append(output_conv)\n\n        self.last_feat_conv = ConvModule(\n            in_channels[-1],\n            feat_channels,\n            kernel_size=3,\n            padding=1,\n            stride=1,\n            bias=self.use_bias,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.mask_feature = Conv2d(\n            feat_channels, out_channels, kernel_size=3, stride=1, padding=1)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        for i in range(0, self.num_inputs - 2):\n            caffe2_xavier_init(self.lateral_convs[i].conv, bias=0)\n            caffe2_xavier_init(self.output_convs[i].conv, bias=0)\n\n        caffe2_xavier_init(self.mask_feature, bias=0)\n        caffe2_xavier_init(self.last_feat_conv, bias=0)\n\n    def forward(self, feats, img_metas):\n        \"\"\"\n        Args:\n            feats (list[Tensor]): Feature maps of each level. Each has\n                shape of (batch_size, c, h, w).\n            img_metas (list[dict]): List of image information. Pass in\n                for creating more accurate padding mask. Not used here.\n\n        Returns:\n            tuple: a tuple containing the following:\n                - mask_feature (Tensor): Shape (batch_size, c, h, w).\n                - memory (Tensor): Output of last stage of backbone.\\\n                        Shape (batch_size, c, h, w).\n        \"\"\"\n        y = self.last_feat_conv(feats[-1])\n        for i in range(self.num_inputs - 2, -1, -1):\n            x = feats[i]\n            cur_feat = self.lateral_convs[i](x)\n            y = cur_feat + \\\n                F.interpolate(y, size=cur_feat.shape[-2:], mode='nearest')\n            y = self.output_convs[i](y)\n\n        mask_feature = self.mask_feature(y)\n        memory = feats[-1]\n        return mask_feature, memory\n\n\n@PLUGIN_LAYERS.register_module()\nclass TransformerEncoderPixelDecoder(PixelDecoder):\n    \"\"\"Pixel decoder with transormer encoder inside.\n\n    Args:\n        in_channels (list[int] | tuple[int]): Number of channels in the\n            input feature maps.\n        feat_channels (int): Number channels for feature.\n        out_channels (int): Number channels for output.\n        norm_cfg (:obj:`mmcv.ConfigDict` | dict): Config for normalization.\n            Defaults to dict(type='GN', num_groups=32).\n        act_cfg (:obj:`mmcv.ConfigDict` | dict): Config for activation.\n            Defaults to dict(type='ReLU').\n        encoder (:obj:`mmcv.ConfigDict` | dict): Config for transorformer\n            encoder.Defaults to None.\n        positional_encoding (:obj:`mmcv.ConfigDict` | dict): Config for\n            transformer encoder position encoding. Defaults to\n            dict(type='SinePositionalEncoding', num_feats=128,\n            normalize=True).\n        init_cfg (:obj:`mmcv.ConfigDict` | dict):  Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 feat_channels,\n                 out_channels,\n                 norm_cfg=dict(type='GN', num_groups=32),\n                 act_cfg=dict(type='ReLU'),\n                 encoder=None,\n                 positional_encoding=dict(\n                     type='SinePositionalEncoding',\n                     num_feats=128,\n                     normalize=True),\n                 init_cfg=None):\n        super(TransformerEncoderPixelDecoder, self).__init__(\n            in_channels,\n            feat_channels,\n            out_channels,\n            norm_cfg,\n            act_cfg,\n            init_cfg=init_cfg)\n        self.last_feat_conv = None\n\n        self.encoder = build_transformer_layer_sequence(encoder)\n        self.encoder_embed_dims = self.encoder.embed_dims\n        assert self.encoder_embed_dims == feat_channels, 'embed_dims({}) of ' \\\n            'tranformer encoder must equal to feat_channels({})'.format(\n                feat_channels, self.encoder_embed_dims)\n        self.positional_encoding = build_positional_encoding(\n            positional_encoding)\n        self.encoder_in_proj = Conv2d(\n            in_channels[-1], feat_channels, kernel_size=1)\n        self.encoder_out_proj = ConvModule(\n            feat_channels,\n            feat_channels,\n            kernel_size=3,\n            stride=1,\n            padding=1,\n            bias=self.use_bias,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n\n    def init_weights(self):\n        \"\"\"Initialize weights.\"\"\"\n        for i in range(0, self.num_inputs - 2):\n            caffe2_xavier_init(self.lateral_convs[i].conv, bias=0)\n            caffe2_xavier_init(self.output_convs[i].conv, bias=0)\n\n        caffe2_xavier_init(self.mask_feature, bias=0)\n        caffe2_xavier_init(self.encoder_in_proj, bias=0)\n        caffe2_xavier_init(self.encoder_out_proj.conv, bias=0)\n\n        for p in self.encoder.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n\n    def forward(self, feats, img_metas):\n        \"\"\"\n        Args:\n            feats (list[Tensor]): Feature maps of each level. Each has\n                shape of (batch_size, c, h, w).\n            img_metas (list[dict]): List of image information. Pass in\n                for creating more accurate padding mask.\n\n        Returns:\n            tuple: a tuple containing the following:\n                - mask_feature (Tensor): shape (batch_size, c, h, w).\n                - memory (Tensor): shape (batch_size, c, h, w).\n        \"\"\"\n        feat_last = feats[-1]\n        bs, c, h, w = feat_last.shape\n        input_img_h, input_img_w = img_metas[0]['batch_input_shape']\n        padding_mask = feat_last.new_ones((bs, input_img_h, input_img_w),\n                                          dtype=torch.float32)\n        for i in range(bs):\n            img_h, img_w, _ = img_metas[i]['img_shape']\n            padding_mask[i, :img_h, :img_w] = 0\n        padding_mask = F.interpolate(\n            padding_mask.unsqueeze(1),\n            size=feat_last.shape[-2:],\n            mode='nearest').to(torch.bool).squeeze(1)\n\n        pos_embed = self.positional_encoding(padding_mask)\n        feat_last = self.encoder_in_proj(feat_last)\n        # (batch_size, c, h, w) -> (num_queries, batch_size, c)\n        feat_last = feat_last.flatten(2).permute(2, 0, 1)\n        pos_embed = pos_embed.flatten(2).permute(2, 0, 1)\n        # (batch_size, h, w) -> (batch_size, h*w)\n        padding_mask = padding_mask.flatten(1)\n        memory = self.encoder(\n            query=feat_last,\n            key=None,\n            value=None,\n            query_pos=pos_embed,\n            query_key_padding_mask=padding_mask)\n        # (num_queries, batch_size, c) -> (batch_size, c, h, w)\n        memory = memory.permute(1, 2, 0).view(bs, self.encoder_embed_dims, h,\n                                              w)\n        y = self.encoder_out_proj(memory)\n        for i in range(self.num_inputs - 2, -1, -1):\n            x = feats[i]\n            cur_feat = self.lateral_convs[i](x)\n            y = cur_feat + \\\n                F.interpolate(y, size=cur_feat.shape[-2:], mode='nearest')\n            y = self.output_convs[i](y)\n\n        mask_feature = self.mask_feature(y)\n        return mask_feature, memory\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_roi_head import BaseRoIHead\nfrom .bbox_heads import (BBoxHead, ConvFCBBoxHead, DIIHead,\n                         DoubleConvFCBBoxHead, SABLHead, SCNetBBoxHead,\n                         Shared2FCBBoxHead, Shared4Conv1FCBBoxHead)\nfrom .cascade_roi_head import CascadeRoIHead\nfrom .double_roi_head import DoubleHeadRoIHead\nfrom .dynamic_roi_head import DynamicRoIHead\nfrom .grid_roi_head import GridRoIHead\nfrom .htc_roi_head import HybridTaskCascadeRoIHead\nfrom .mask_heads import (CoarseMaskHead, FCNMaskHead, FeatureRelayHead,\n                         FusedSemanticHead, GlobalContextHead, GridHead,\n                         HTCMaskHead, MaskIoUHead, MaskPointHead,\n                         SCNetMaskHead, SCNetSemanticHead)\nfrom .mask_scoring_roi_head import MaskScoringRoIHead\nfrom .pisa_roi_head import PISARoIHead\nfrom .point_rend_roi_head import PointRendRoIHead\nfrom .roi_extractors import (BaseRoIExtractor, GenericRoIExtractor,\n                             SingleRoIExtractor)\nfrom .scnet_roi_head import SCNetRoIHead\nfrom .shared_heads import ResLayer\nfrom .sparse_roi_head import SparseRoIHead\nfrom .standard_roi_head import StandardRoIHead\nfrom .trident_roi_head import TridentRoIHead\n\n__all__ = [\n    'BaseRoIHead', 'CascadeRoIHead', 'DoubleHeadRoIHead', 'MaskScoringRoIHead',\n    'HybridTaskCascadeRoIHead', 'GridRoIHead', 'ResLayer', 'BBoxHead',\n    'ConvFCBBoxHead', 'DIIHead', 'SABLHead', 'Shared2FCBBoxHead',\n    'StandardRoIHead', 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead',\n    'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',\n    'MaskIoUHead', 'BaseRoIExtractor', 'GenericRoIExtractor',\n    'SingleRoIExtractor', 'PISARoIHead', 'PointRendRoIHead', 'MaskPointHead',\n    'CoarseMaskHead', 'DynamicRoIHead', 'SparseRoIHead', 'TridentRoIHead',\n    'SCNetRoIHead', 'SCNetMaskHead', 'SCNetSemanticHead', 'SCNetBBoxHead',\n    'FeatureRelayHead', 'GlobalContextHead'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/base_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.runner import BaseModule\n\nfrom ..builder import build_shared_head\n\n\nclass BaseRoIHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for RoIHeads.\"\"\"\n\n    def __init__(self,\n                 bbox_roi_extractor=None,\n                 bbox_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 shared_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(BaseRoIHead, self).__init__(init_cfg)\n        self.train_cfg = train_cfg\n        self.test_cfg = test_cfg\n        if shared_head is not None:\n            shared_head.pretrained = pretrained\n            self.shared_head = build_shared_head(shared_head)\n\n        if bbox_head is not None:\n            self.init_bbox_head(bbox_roi_extractor, bbox_head)\n\n        if mask_head is not None:\n            self.init_mask_head(mask_roi_extractor, mask_head)\n\n        self.init_assigner_sampler()\n\n    @property\n    def with_bbox(self):\n        \"\"\"bool: whether the RoI head contains a `bbox_head`\"\"\"\n        return hasattr(self, 'bbox_head') and self.bbox_head is not None\n\n    @property\n    def with_mask(self):\n        \"\"\"bool: whether the RoI head contains a `mask_head`\"\"\"\n        return hasattr(self, 'mask_head') and self.mask_head is not None\n\n    @property\n    def with_shared_head(self):\n        \"\"\"bool: whether the RoI head contains a `shared_head`\"\"\"\n        return hasattr(self, 'shared_head') and self.shared_head is not None\n\n    @abstractmethod\n    def init_bbox_head(self):\n        \"\"\"Initialize ``bbox_head``\"\"\"\n        pass\n\n    @abstractmethod\n    def init_mask_head(self):\n        \"\"\"Initialize ``mask_head``\"\"\"\n        pass\n\n    @abstractmethod\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        pass\n\n    @abstractmethod\n    def forward_train(self,\n                      x,\n                      img_meta,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      **kwargs):\n        \"\"\"Forward function during training.\"\"\"\n\n    async def async_simple_test(self,\n                                x,\n                                proposal_list,\n                                img_metas,\n                                proposals=None,\n                                rescale=False,\n                                **kwargs):\n        \"\"\"Asynchronized test function.\"\"\"\n        raise NotImplementedError\n\n    def simple_test(self,\n                    x,\n                    proposal_list,\n                    img_meta,\n                    proposals=None,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test without augmentation.\"\"\"\n\n    def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .bbox_head import BBoxHead\nfrom .convfc_bbox_head import (ConvFCBBoxHead, Shared2FCBBoxHead,\n                               Shared4Conv1FCBBoxHead)\nfrom .dii_head import DIIHead\nfrom .double_bbox_head import DoubleConvFCBBoxHead\nfrom .sabl_head import SABLHead\nfrom .scnet_bbox_head import SCNetBBoxHead\n\n__all__ = [\n    'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',\n    'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'SABLHead', 'DIIHead',\n    'SCNetBBoxHead'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.runner import BaseModule, auto_fp16, force_fp32\nfrom torch.nn.modules.utils import _pair\n\nfrom mmdet.core import build_bbox_coder, multi_apply, multiclass_nms\nfrom mmdet.models.builder import HEADS, build_loss\nfrom mmdet.models.losses import accuracy\nfrom mmdet.models.utils import build_linear_layer\n\n\n@HEADS.register_module()\nclass BBoxHead(BaseModule):\n    \"\"\"Simplest RoI head, with only two fc layers for classification and\n    regression respectively.\"\"\"\n\n    def __init__(self,\n                 with_avg_pool=False,\n                 with_cls=True,\n                 with_reg=True,\n                 roi_feat_size=7,\n                 in_channels=256,\n                 num_classes=80,\n                 bbox_coder=dict(\n                     type='DeltaXYWHBBoxCoder',\n                     clip_border=True,\n                     target_means=[0., 0., 0., 0.],\n                     target_stds=[0.1, 0.1, 0.2, 0.2]),\n                 reg_class_agnostic=False,\n                 reg_decoded_bbox=False,\n                 reg_predictor_cfg=dict(type='Linear'),\n                 cls_predictor_cfg=dict(type='Linear'),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     loss_weight=1.0),\n                 loss_bbox=dict(\n                     type='SmoothL1Loss', beta=1.0, loss_weight=1.0),\n                 init_cfg=None):\n        super(BBoxHead, self).__init__(init_cfg)\n        assert with_cls or with_reg\n        self.with_avg_pool = with_avg_pool\n        self.with_cls = with_cls\n        self.with_reg = with_reg\n        self.roi_feat_size = _pair(roi_feat_size)\n        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]\n        self.in_channels = in_channels\n        self.num_classes = num_classes\n        self.reg_class_agnostic = reg_class_agnostic\n        self.reg_decoded_bbox = reg_decoded_bbox\n        self.reg_predictor_cfg = reg_predictor_cfg\n        self.cls_predictor_cfg = cls_predictor_cfg\n        self.fp16_enabled = False\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox = build_loss(loss_bbox)\n\n        in_channels = self.in_channels\n        if self.with_avg_pool:\n            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)\n        else:\n            in_channels *= self.roi_feat_area\n        if self.with_cls:\n            # need to add background class\n            if self.custom_cls_channels:\n                cls_channels = self.loss_cls.get_cls_channels(self.num_classes)\n            else:\n                cls_channels = num_classes + 1\n            self.fc_cls = build_linear_layer(\n                self.cls_predictor_cfg,\n                in_features=in_channels,\n                out_features=cls_channels)\n        if self.with_reg:\n            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes\n            self.fc_reg = build_linear_layer(\n                self.reg_predictor_cfg,\n                in_features=in_channels,\n                out_features=out_dim_reg)\n        self.debug_imgs = None\n        if init_cfg is None:\n            self.init_cfg = []\n            if self.with_cls:\n                self.init_cfg += [\n                    dict(\n                        type='Normal', std=0.01, override=dict(name='fc_cls'))\n                ]\n            if self.with_reg:\n                self.init_cfg += [\n                    dict(\n                        type='Normal', std=0.001, override=dict(name='fc_reg'))\n                ]\n\n    @property\n    def custom_cls_channels(self):\n        return getattr(self.loss_cls, 'custom_cls_channels', False)\n\n    @property\n    def custom_activation(self):\n        return getattr(self.loss_cls, 'custom_activation', False)\n\n    @property\n    def custom_accuracy(self):\n        return getattr(self.loss_cls, 'custom_accuracy', False)\n\n    @auto_fp16()\n    def forward(self, x):\n        if self.with_avg_pool:\n            if x.numel() > 0:\n                x = self.avg_pool(x)\n                x = x.view(x.size(0), -1)\n            else:\n                # avg_pool does not support empty tensor,\n                # so use torch.mean instead it\n                x = torch.mean(x, dim=(-1, -2))\n        cls_score = self.fc_cls(x) if self.with_cls else None\n        bbox_pred = self.fc_reg(x) if self.with_reg else None\n        return cls_score, bbox_pred\n\n    def _get_target_single(self, pos_bboxes, neg_bboxes, pos_gt_bboxes,\n                           pos_gt_labels, cfg):\n        \"\"\"Calculate the ground truth for proposals in the single image\n        according to the sampling results.\n\n        Args:\n            pos_bboxes (Tensor): Contains all the positive boxes,\n                has shape (num_pos, 4), the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            neg_bboxes (Tensor): Contains all the negative boxes,\n                has shape (num_neg, 4), the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            pos_gt_bboxes (Tensor): Contains gt_boxes for\n                all positive samples, has shape (num_pos, 4),\n                the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            pos_gt_labels (Tensor): Contains gt_labels for\n                all positive samples, has shape (num_pos, ).\n            cfg (obj:`ConfigDict`): `train_cfg` of R-CNN.\n\n        Returns:\n            Tuple[Tensor]: Ground truth for proposals\n            in a single image. Containing the following Tensors:\n\n                - labels(Tensor): Gt_labels for all proposals, has\n                  shape (num_proposals,).\n                - label_weights(Tensor): Labels_weights for all\n                  proposals, has shape (num_proposals,).\n                - bbox_targets(Tensor):Regression target for all\n                  proposals, has shape (num_proposals, 4), the\n                  last dimension 4 represents [tl_x, tl_y, br_x, br_y].\n                - bbox_weights(Tensor):Regression weights for all\n                  proposals, has shape (num_proposals, 4).\n        \"\"\"\n        num_pos = pos_bboxes.size(0)\n        num_neg = neg_bboxes.size(0)\n        num_samples = num_pos + num_neg\n\n        # original implementation uses new_zeros since BG are set to be 0\n        # now use empty & fill because BG cat_id = num_classes,\n        # FG cat_id = [0, num_classes-1]\n        labels = pos_bboxes.new_full((num_samples, ),\n                                     self.num_classes,\n                                     dtype=torch.long)\n        label_weights = pos_bboxes.new_zeros(num_samples)\n        bbox_targets = pos_bboxes.new_zeros(num_samples, 4)\n        bbox_weights = pos_bboxes.new_zeros(num_samples, 4)\n        if num_pos > 0:\n            labels[:num_pos] = pos_gt_labels\n            pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight\n            label_weights[:num_pos] = pos_weight\n            if not self.reg_decoded_bbox:\n                pos_bbox_targets = self.bbox_coder.encode(\n                    pos_bboxes, pos_gt_bboxes)\n            else:\n                # When the regression loss (e.g. `IouLoss`, `GIouLoss`)\n                # is applied directly on the decoded bounding boxes, both\n                # the predicted boxes and regression targets should be with\n                # absolute coordinate format.\n                pos_bbox_targets = pos_gt_bboxes\n            bbox_targets[:num_pos, :] = pos_bbox_targets\n            bbox_weights[:num_pos, :] = 1\n        if num_neg > 0:\n            label_weights[-num_neg:] = 1.0\n\n        return labels, label_weights, bbox_targets, bbox_weights\n\n    def get_targets(self,\n                    sampling_results,\n                    gt_bboxes,\n                    gt_labels,\n                    rcnn_train_cfg,\n                    concat=True):\n        \"\"\"Calculate the ground truth for all samples in a batch according to\n        the sampling_results.\n\n        Almost the same as the implementation in bbox_head, we passed\n        additional parameters pos_inds_list and neg_inds_list to\n        `_get_target_single` function.\n\n        Args:\n            sampling_results (List[obj:SamplingResults]): Assign results of\n                all images in a batch after sampling.\n            gt_bboxes (list[Tensor]): Gt_bboxes of all images in a batch,\n                each tensor has shape (num_gt, 4),  the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            gt_labels (list[Tensor]): Gt_labels of all images in a batch,\n                each tensor has shape (num_gt,).\n            rcnn_train_cfg (obj:ConfigDict): `train_cfg` of RCNN.\n            concat (bool): Whether to concatenate the results of all\n                the images in a single batch.\n\n        Returns:\n            Tuple[Tensor]: Ground truth for proposals in a single image.\n            Containing the following list of Tensors:\n\n                - labels (list[Tensor],Tensor): Gt_labels for all\n                  proposals in a batch, each tensor in list has\n                  shape (num_proposals,) when `concat=False`, otherwise\n                  just a single tensor has shape (num_all_proposals,).\n                - label_weights (list[Tensor]): Labels_weights for\n                  all proposals in a batch, each tensor in list has\n                  shape (num_proposals,) when `concat=False`, otherwise\n                  just a single tensor has shape (num_all_proposals,).\n                - bbox_targets (list[Tensor],Tensor): Regression target\n                  for all proposals in a batch, each tensor in list\n                  has shape (num_proposals, 4) when `concat=False`,\n                  otherwise just a single tensor has shape\n                  (num_all_proposals, 4), the last dimension 4 represents\n                  [tl_x, tl_y, br_x, br_y].\n                - bbox_weights (list[tensor],Tensor): Regression weights for\n                  all proposals in a batch, each tensor in list has shape\n                  (num_proposals, 4) when `concat=False`, otherwise just a\n                  single tensor has shape (num_all_proposals, 4).\n        \"\"\"\n        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]\n        neg_bboxes_list = [res.neg_bboxes for res in sampling_results]\n        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]\n        pos_gt_labels_list = [res.pos_gt_labels for res in sampling_results]\n        labels, label_weights, bbox_targets, bbox_weights = multi_apply(\n            self._get_target_single,\n            pos_bboxes_list,\n            neg_bboxes_list,\n            pos_gt_bboxes_list,\n            pos_gt_labels_list,\n            cfg=rcnn_train_cfg)\n\n        if concat:\n            labels = torch.cat(labels, 0)\n            label_weights = torch.cat(label_weights, 0)\n            bbox_targets = torch.cat(bbox_targets, 0)\n            bbox_weights = torch.cat(bbox_weights, 0)\n        return labels, label_weights, bbox_targets, bbox_weights\n\n    @force_fp32(apply_to=('cls_score', 'bbox_pred'))\n    def loss(self,\n             cls_score,\n             bbox_pred,\n             rois,\n             labels,\n             label_weights,\n             bbox_targets,\n             bbox_weights,\n             reduction_override=None):\n        losses = dict()\n        if cls_score is not None:\n            avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)\n            if cls_score.numel() > 0:\n                loss_cls_ = self.loss_cls(\n                    cls_score,\n                    labels,\n                    label_weights,\n                    avg_factor=avg_factor,\n                    reduction_override=reduction_override)\n                if isinstance(loss_cls_, dict):\n                    losses.update(loss_cls_)\n                else:\n                    losses['loss_cls'] = loss_cls_\n                if self.custom_activation:\n                    acc_ = self.loss_cls.get_accuracy(cls_score, labels)\n                    losses.update(acc_)\n                else:\n                    losses['acc'] = accuracy(cls_score, labels)\n        if bbox_pred is not None:\n            bg_class_ind = self.num_classes\n            # 0~self.num_classes-1 are FG, self.num_classes is BG\n            pos_inds = (labels >= 0) & (labels < bg_class_ind)\n            # do not perform bounding box regression for BG anymore.\n            if pos_inds.any():\n                if self.reg_decoded_bbox:\n                    # When the regression loss (e.g. `IouLoss`,\n                    # `GIouLoss`, `DIouLoss`) is applied directly on\n                    # the decoded bounding boxes, it decodes the\n                    # already encoded coordinates to absolute format.\n                    bbox_pred = self.bbox_coder.decode(rois[:, 1:], bbox_pred)\n                if self.reg_class_agnostic:\n                    pos_bbox_pred = bbox_pred.view(\n                        bbox_pred.size(0), 4)[pos_inds.type(torch.bool)]\n                else:\n                    pos_bbox_pred = bbox_pred.view(\n                        bbox_pred.size(0), -1,\n                        4)[pos_inds.type(torch.bool),\n                           labels[pos_inds.type(torch.bool)]]\n                losses['loss_bbox'] = self.loss_bbox(\n                    pos_bbox_pred,\n                    bbox_targets[pos_inds.type(torch.bool)],\n                    bbox_weights[pos_inds.type(torch.bool)],\n                    avg_factor=bbox_targets.size(0),\n                    reduction_override=reduction_override)\n            else:\n                losses['loss_bbox'] = bbox_pred[pos_inds].sum()\n        return losses\n\n    @force_fp32(apply_to=('cls_score', 'bbox_pred'))\n    def get_bboxes(self,\n                   rois,\n                   cls_score,\n                   bbox_pred,\n                   img_shape,\n                   scale_factor,\n                   rescale=False,\n                   cfg=None):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            rois (Tensor): Boxes to be transformed. Has shape (num_boxes, 5).\n                last dimension 5 arrange as (batch_index, x1, y1, x2, y2).\n            cls_score (Tensor): Box scores, has shape\n                (num_boxes, num_classes + 1).\n            bbox_pred (Tensor, optional): Box energies / deltas.\n                has shape (num_boxes, num_classes * 4).\n            img_shape (Sequence[int], optional): Maximum bounds for boxes,\n                specifies (H, W, C) or (H, W).\n            scale_factor (ndarray): Scale factor of the\n               image arrange as (w_scale, h_scale, w_scale, h_scale).\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n            cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None\n\n        Returns:\n            tuple[Tensor, Tensor]:\n                First tensor is `det_bboxes`, has the shape\n                (num_boxes, 5) and last\n                dimension 5 represent (tl_x, tl_y, br_x, br_y, score).\n                Second tensor is the labels with shape (num_boxes, ).\n        \"\"\"\n\n        # some loss (Seesaw loss..) may have custom activation\n        if self.custom_cls_channels:\n            scores = self.loss_cls.get_activation(cls_score)\n        else:\n            scores = F.softmax(\n                cls_score, dim=-1) if cls_score is not None else None\n        # bbox_pred would be None in some detector when with_reg is False,\n        # e.g. Grid R-CNN.\n        if bbox_pred is not None:\n            bboxes = self.bbox_coder.decode(\n                rois[..., 1:], bbox_pred, max_shape=img_shape)\n        else:\n            bboxes = rois[:, 1:].clone()\n            if img_shape is not None:\n                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1])\n                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0])\n\n        if rescale and bboxes.size(0) > 0:\n            scale_factor = bboxes.new_tensor(scale_factor)\n            bboxes = (bboxes.view(bboxes.size(0), -1, 4) / scale_factor).view(\n                bboxes.size()[0], -1)\n\n        if cfg is None:\n            return bboxes, scores\n        else:\n            det_bboxes, det_labels = multiclass_nms(bboxes, scores,\n                                                    cfg.score_thr, cfg.nms,\n                                                    cfg.max_per_img)\n\n            return det_bboxes, det_labels\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):\n        \"\"\"Refine bboxes during training.\n\n        Args:\n            rois (Tensor): Shape (n*bs, 5), where n is image number per GPU,\n                and bs is the sampled RoIs per image. The first column is\n                the image id and the next 4 columns are x1, y1, x2, y2.\n            labels (Tensor): Shape (n*bs, ).\n            bbox_preds (Tensor): Shape (n*bs, 4) or (n*bs, 4*#class).\n            pos_is_gts (list[Tensor]): Flags indicating if each positive bbox\n                is a gt bbox.\n            img_metas (list[dict]): Meta info of each image.\n\n        Returns:\n            list[Tensor]: Refined bboxes of each image in a mini-batch.\n\n        Example:\n            >>> # xdoctest: +REQUIRES(module:kwarray)\n            >>> import kwarray\n            >>> import numpy as np\n            >>> from mmdet.core.bbox.demodata import random_boxes\n            >>> self = BBoxHead(reg_class_agnostic=True)\n            >>> n_roi = 2\n            >>> n_img = 4\n            >>> scale = 512\n            >>> rng = np.random.RandomState(0)\n            >>> img_metas = [{'img_shape': (scale, scale)}\n            ...              for _ in range(n_img)]\n            >>> # Create rois in the expected format\n            >>> roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)\n            >>> img_ids = torch.randint(0, n_img, (n_roi,))\n            >>> img_ids = img_ids.float()\n            >>> rois = torch.cat([img_ids[:, None], roi_boxes], dim=1)\n            >>> # Create other args\n            >>> labels = torch.randint(0, 2, (n_roi,)).long()\n            >>> bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)\n            >>> # For each image, pretend random positive boxes are gts\n            >>> is_label_pos = (labels.numpy() > 0).astype(np.int)\n            >>> lbl_per_img = kwarray.group_items(is_label_pos,\n            ...                                   img_ids.numpy())\n            >>> pos_per_img = [sum(lbl_per_img.get(gid, []))\n            ...                for gid in range(n_img)]\n            >>> pos_is_gts = [\n            >>>     torch.randint(0, 2, (npos,)).byte().sort(\n            >>>         descending=True)[0]\n            >>>     for npos in pos_per_img\n            >>> ]\n            >>> bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,\n            >>>                    pos_is_gts, img_metas)\n            >>> print(bboxes_list)\n        \"\"\"\n        img_ids = rois[:, 0].long().unique(sorted=True)\n        assert img_ids.numel() <= len(img_metas)\n\n        bboxes_list = []\n        for i in range(len(img_metas)):\n            inds = torch.nonzero(\n                rois[:, 0] == i, as_tuple=False).squeeze(dim=1)\n            num_rois = inds.numel()\n\n            bboxes_ = rois[inds, 1:]\n            label_ = labels[inds]\n            bbox_pred_ = bbox_preds[inds]\n            img_meta_ = img_metas[i]\n            pos_is_gts_ = pos_is_gts[i]\n\n            bboxes = self.regress_by_class(bboxes_, label_, bbox_pred_,\n                                           img_meta_)\n\n            # filter gt bboxes\n            pos_keep = 1 - pos_is_gts_\n            keep_inds = pos_is_gts_.new_ones(num_rois)\n            keep_inds[:len(pos_is_gts_)] = pos_keep\n\n            bboxes_list.append(bboxes[keep_inds.type(torch.bool)])\n\n        return bboxes_list\n\n    @force_fp32(apply_to=('bbox_pred', ))\n    def regress_by_class(self, rois, label, bbox_pred, img_meta):\n        \"\"\"Regress the bbox for the predicted class. Used in Cascade R-CNN.\n\n        Args:\n            rois (Tensor): Rois from `rpn_head` or last stage\n                `bbox_head`, has shape (num_proposals, 4) or\n                (num_proposals, 5).\n            label (Tensor): Only used when `self.reg_class_agnostic`\n                is False, has shape (num_proposals, ).\n            bbox_pred (Tensor): Regression prediction of\n                current stage `bbox_head`. When `self.reg_class_agnostic`\n                is False, it has shape (n, num_classes * 4), otherwise\n                it has shape (n, 4).\n            img_meta (dict): Image meta info.\n\n        Returns:\n            Tensor: Regressed bboxes, the same shape as input rois.\n        \"\"\"\n\n        assert rois.size(1) == 4 or rois.size(1) == 5, repr(rois.shape)\n\n        if not self.reg_class_agnostic:\n            label = label * 4\n            inds = torch.stack((label, label + 1, label + 2, label + 3), 1)\n            bbox_pred = torch.gather(bbox_pred, 1, inds)\n        assert bbox_pred.size(1) == 4\n\n        max_shape = img_meta['img_shape']\n\n        if rois.size(1) == 4:\n            new_rois = self.bbox_coder.decode(\n                rois, bbox_pred, max_shape=max_shape)\n        else:\n            bboxes = self.bbox_coder.decode(\n                rois[:, 1:], bbox_pred, max_shape=max_shape)\n            new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)\n\n        return new_rois\n\n    def onnx_export(self,\n                    rois,\n                    cls_score,\n                    bbox_pred,\n                    img_shape,\n                    cfg=None,\n                    **kwargs):\n        \"\"\"Transform network output for a batch into bbox predictions.\n\n        Args:\n            rois (Tensor): Boxes to be transformed.\n                Has shape (B, num_boxes, 5)\n            cls_score (Tensor): Box scores. has shape\n                (B, num_boxes, num_classes + 1), 1 represent the background.\n            bbox_pred (Tensor, optional): Box energies / deltas for,\n                has shape (B, num_boxes, num_classes * 4) when.\n            img_shape (torch.Tensor): Shape of image.\n            cfg (obj:`ConfigDict`): `test_cfg` of Bbox Head. Default: None\n\n        Returns:\n            tuple[Tensor, Tensor]: dets of shape [N, num_det, 5]\n                and class labels of shape [N, num_det].\n        \"\"\"\n\n        assert rois.ndim == 3, 'Only support export two stage ' \\\n                               'model to ONNX ' \\\n                               'with batch dimension. '\n        if self.custom_cls_channels:\n            scores = self.loss_cls.get_activation(cls_score)\n        else:\n            scores = F.softmax(\n                cls_score, dim=-1) if cls_score is not None else None\n\n        if bbox_pred is not None:\n            bboxes = self.bbox_coder.decode(\n                rois[..., 1:], bbox_pred, max_shape=img_shape)\n        else:\n            bboxes = rois[..., 1:].clone()\n            if img_shape is not None:\n                max_shape = bboxes.new_tensor(img_shape)[..., :2]\n                min_xy = bboxes.new_tensor(0)\n                max_xy = torch.cat(\n                    [max_shape] * 2, dim=-1).flip(-1).unsqueeze(-2)\n                bboxes = torch.where(bboxes < min_xy, min_xy, bboxes)\n                bboxes = torch.where(bboxes > max_xy, max_xy, bboxes)\n\n        # Replace multiclass_nms with ONNX::NonMaxSuppression in deployment\n        from mmdet.core.export import add_dummy_nms_for_onnx\n        max_output_boxes_per_class = cfg.nms.get('max_output_boxes_per_class',\n                                                 cfg.max_per_img)\n        iou_threshold = cfg.nms.get('iou_threshold', 0.5)\n        score_threshold = cfg.score_thr\n        nms_pre = cfg.get('deploy_nms_pre', -1)\n\n        scores = scores[..., :self.num_classes]\n        if self.reg_class_agnostic:\n            return add_dummy_nms_for_onnx(\n                bboxes,\n                scores,\n                max_output_boxes_per_class,\n                iou_threshold,\n                score_threshold,\n                pre_top_k=nms_pre,\n                after_top_k=cfg.max_per_img)\n        else:\n            batch_size = scores.shape[0]\n            labels = torch.arange(\n                self.num_classes, dtype=torch.long).to(scores.device)\n            labels = labels.view(1, 1, -1).expand_as(scores)\n            labels = labels.reshape(batch_size, -1)\n            scores = scores.reshape(batch_size, -1)\n            bboxes = bboxes.reshape(batch_size, -1, 4)\n\n            max_size = torch.max(img_shape)\n            # Offset bboxes of each class so that bboxes of different labels\n            #  do not overlap.\n            offsets = (labels * max_size + 1).unsqueeze(2)\n            bboxes_for_nms = bboxes + offsets\n\n            batch_dets, labels = add_dummy_nms_for_onnx(\n                bboxes_for_nms,\n                scores.unsqueeze(2),\n                max_output_boxes_per_class,\n                iou_threshold,\n                score_threshold,\n                pre_top_k=nms_pre,\n                after_top_k=cfg.max_per_img,\n                labels=labels)\n            # Offset the bboxes back after dummy nms.\n            offsets = (labels * max_size + 1).unsqueeze(2)\n            # Indexing + inplace operation fails with dynamic shape in ONNX\n            # original style: batch_dets[..., :4] -= offsets\n            bboxes, scores = batch_dets[..., 0:4], batch_dets[..., 4:5]\n            bboxes -= offsets\n            batch_dets = torch.cat([bboxes, scores], dim=2)\n            return batch_dets, labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/convfc_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\n\nfrom mmdet.models.builder import HEADS\nfrom mmdet.models.utils import build_linear_layer\nfrom .bbox_head import BBoxHead\n\n\n@HEADS.register_module()\nclass ConvFCBBoxHead(BBoxHead):\n    r\"\"\"More general bbox head, with shared conv and fc layers and two optional\n    separated branches.\n\n    .. code-block:: none\n\n                                    /-> cls convs -> cls fcs -> cls\n        shared convs -> shared fcs\n                                    \\-> reg convs -> reg fcs -> reg\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_shared_convs=0,\n                 num_shared_fcs=0,\n                 num_cls_convs=0,\n                 num_cls_fcs=0,\n                 num_reg_convs=0,\n                 num_reg_fcs=0,\n                 conv_out_channels=256,\n                 fc_out_channels=1024,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=None,\n                 *args,\n                 **kwargs):\n        super(ConvFCBBoxHead, self).__init__(\n            *args, init_cfg=init_cfg, **kwargs)\n        assert (num_shared_convs + num_shared_fcs + num_cls_convs +\n                num_cls_fcs + num_reg_convs + num_reg_fcs > 0)\n        if num_cls_convs > 0 or num_reg_convs > 0:\n            assert num_shared_fcs == 0\n        if not self.with_cls:\n            assert num_cls_convs == 0 and num_cls_fcs == 0\n        if not self.with_reg:\n            assert num_reg_convs == 0 and num_reg_fcs == 0\n        self.num_shared_convs = num_shared_convs\n        self.num_shared_fcs = num_shared_fcs\n        self.num_cls_convs = num_cls_convs\n        self.num_cls_fcs = num_cls_fcs\n        self.num_reg_convs = num_reg_convs\n        self.num_reg_fcs = num_reg_fcs\n        self.conv_out_channels = conv_out_channels\n        self.fc_out_channels = fc_out_channels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        # add shared convs and fcs\n        self.shared_convs, self.shared_fcs, last_layer_dim = \\\n            self._add_conv_fc_branch(\n                self.num_shared_convs, self.num_shared_fcs, self.in_channels,\n                True)\n        self.shared_out_channels = last_layer_dim\n\n        # add cls specific branch\n        self.cls_convs, self.cls_fcs, self.cls_last_dim = \\\n            self._add_conv_fc_branch(\n                self.num_cls_convs, self.num_cls_fcs, self.shared_out_channels)\n\n        # add reg specific branch\n        self.reg_convs, self.reg_fcs, self.reg_last_dim = \\\n            self._add_conv_fc_branch(\n                self.num_reg_convs, self.num_reg_fcs, self.shared_out_channels)\n\n        if self.num_shared_fcs == 0 and not self.with_avg_pool:\n            if self.num_cls_fcs == 0:\n                self.cls_last_dim *= self.roi_feat_area\n            if self.num_reg_fcs == 0:\n                self.reg_last_dim *= self.roi_feat_area\n\n        self.relu = nn.ReLU(inplace=True)\n        # reconstruct fc_cls and fc_reg since input channels are changed\n        if self.with_cls:\n            if self.custom_cls_channels:\n                cls_channels = self.loss_cls.get_cls_channels(self.num_classes)\n            else:\n                cls_channels = self.num_classes + 1\n            self.fc_cls = build_linear_layer(\n                self.cls_predictor_cfg,\n                in_features=self.cls_last_dim,\n                out_features=cls_channels)\n        if self.with_reg:\n            out_dim_reg = (4 if self.reg_class_agnostic else 4 *\n                           self.num_classes)\n            self.fc_reg = build_linear_layer(\n                self.reg_predictor_cfg,\n                in_features=self.reg_last_dim,\n                out_features=out_dim_reg)\n\n        if init_cfg is None:\n            # when init_cfg is None,\n            # It has been set to\n            # [[dict(type='Normal', std=0.01, override=dict(name='fc_cls'))],\n            #  [dict(type='Normal', std=0.001, override=dict(name='fc_reg'))]\n            # after `super(ConvFCBBoxHead, self).__init__()`\n            # we only need to append additional configuration\n            # for `shared_fcs`, `cls_fcs` and `reg_fcs`\n            self.init_cfg += [\n                dict(\n                    type='Xavier',\n                    distribution='uniform',\n                    override=[\n                        dict(name='shared_fcs'),\n                        dict(name='cls_fcs'),\n                        dict(name='reg_fcs')\n                    ])\n            ]\n\n    def _add_conv_fc_branch(self,\n                            num_branch_convs,\n                            num_branch_fcs,\n                            in_channels,\n                            is_shared=False):\n        \"\"\"Add shared or separable branch.\n\n        convs -> avg pool (optional) -> fcs\n        \"\"\"\n        last_layer_dim = in_channels\n        # add branch specific conv layers\n        branch_convs = nn.ModuleList()\n        if num_branch_convs > 0:\n            for i in range(num_branch_convs):\n                conv_in_channels = (\n                    last_layer_dim if i == 0 else self.conv_out_channels)\n                branch_convs.append(\n                    ConvModule(\n                        conv_in_channels,\n                        self.conv_out_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg))\n            last_layer_dim = self.conv_out_channels\n        # add branch specific fc layers\n        branch_fcs = nn.ModuleList()\n        if num_branch_fcs > 0:\n            # for shared branch, only consider self.with_avg_pool\n            # for separated branches, also consider self.num_shared_fcs\n            if (is_shared\n                    or self.num_shared_fcs == 0) and not self.with_avg_pool:\n                last_layer_dim *= self.roi_feat_area\n            for i in range(num_branch_fcs):\n                fc_in_channels = (\n                    last_layer_dim if i == 0 else self.fc_out_channels)\n                branch_fcs.append(\n                    nn.Linear(fc_in_channels, self.fc_out_channels))\n            last_layer_dim = self.fc_out_channels\n        return branch_convs, branch_fcs, last_layer_dim\n\n    def forward(self, x):\n        # shared part\n        if self.num_shared_convs > 0:\n            for conv in self.shared_convs:\n                x = conv(x)\n\n        if self.num_shared_fcs > 0:\n            if self.with_avg_pool:\n                x = self.avg_pool(x)\n\n            x = x.flatten(1)\n\n            for fc in self.shared_fcs:\n                x = self.relu(fc(x))\n        # separate branches\n        x_cls = x\n        x_reg = x\n\n        for conv in self.cls_convs:\n            x_cls = conv(x_cls)\n        if x_cls.dim() > 2:\n            if self.with_avg_pool:\n                x_cls = self.avg_pool(x_cls)\n            x_cls = x_cls.flatten(1)\n        for fc in self.cls_fcs:\n            x_cls = self.relu(fc(x_cls))\n\n        for conv in self.reg_convs:\n            x_reg = conv(x_reg)\n        if x_reg.dim() > 2:\n            if self.with_avg_pool:\n                x_reg = self.avg_pool(x_reg)\n            x_reg = x_reg.flatten(1)\n        for fc in self.reg_fcs:\n            x_reg = self.relu(fc(x_reg))\n\n        cls_score = self.fc_cls(x_cls) if self.with_cls else None\n        bbox_pred = self.fc_reg(x_reg) if self.with_reg else None\n        return cls_score, bbox_pred\n\n\n@HEADS.register_module()\nclass Shared2FCBBoxHead(ConvFCBBoxHead):\n\n    def __init__(self, fc_out_channels=1024, *args, **kwargs):\n        super(Shared2FCBBoxHead, self).__init__(\n            num_shared_convs=0,\n            num_shared_fcs=2,\n            num_cls_convs=0,\n            num_cls_fcs=0,\n            num_reg_convs=0,\n            num_reg_fcs=0,\n            fc_out_channels=fc_out_channels,\n            *args,\n            **kwargs)\n\n\n@HEADS.register_module()\nclass Shared4Conv1FCBBoxHead(ConvFCBBoxHead):\n\n    def __init__(self, fc_out_channels=1024, *args, **kwargs):\n        super(Shared4Conv1FCBBoxHead, self).__init__(\n            num_shared_convs=4,\n            num_shared_fcs=1,\n            num_cls_convs=0,\n            num_cls_fcs=0,\n            num_reg_convs=0,\n            num_reg_fcs=0,\n            fc_out_channels=fc_out_channels,\n            *args,\n            **kwargs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/dii_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import (bias_init_with_prob, build_activation_layer,\n                      build_norm_layer)\nfrom mmcv.cnn.bricks.transformer import FFN, MultiheadAttention\nfrom mmcv.runner import auto_fp16, force_fp32\n\nfrom mmdet.core import multi_apply\nfrom mmdet.models.builder import HEADS, build_loss\nfrom mmdet.models.dense_heads.atss_head import reduce_mean\nfrom mmdet.models.losses import accuracy\nfrom mmdet.models.utils import build_transformer\nfrom .bbox_head import BBoxHead\n\n\n@HEADS.register_module()\nclass DIIHead(BBoxHead):\n    r\"\"\"Dynamic Instance Interactive Head for `Sparse R-CNN: End-to-End Object\n    Detection with Learnable Proposals <https://arxiv.org/abs/2011.12450>`_\n\n    Args:\n        num_classes (int): Number of class in dataset.\n            Defaults to 80.\n        num_ffn_fcs (int): The number of fully-connected\n            layers in FFNs. Defaults to 2.\n        num_heads (int): The hidden dimension of FFNs.\n            Defaults to 8.\n        num_cls_fcs (int): The number of fully-connected\n            layers in classification subnet. Defaults to 1.\n        num_reg_fcs (int): The number of fully-connected\n            layers in regression subnet. Defaults to 3.\n        feedforward_channels (int): The hidden dimension\n            of FFNs. Defaults to 2048\n        in_channels (int): Hidden_channels of MultiheadAttention.\n            Defaults to 256.\n        dropout (float): Probability of drop the channel.\n            Defaults to 0.0\n        ffn_act_cfg (dict): The activation config for FFNs.\n        dynamic_conv_cfg (dict): The convolution config\n            for DynamicConv.\n        loss_iou (dict): The config for iou or giou loss.\n\n    \"\"\"\n\n    def __init__(self,\n                 num_classes=80,\n                 num_ffn_fcs=2,\n                 num_heads=8,\n                 num_cls_fcs=1,\n                 num_reg_fcs=3,\n                 feedforward_channels=2048,\n                 in_channels=256,\n                 dropout=0.0,\n                 ffn_act_cfg=dict(type='ReLU', inplace=True),\n                 dynamic_conv_cfg=dict(\n                     type='DynamicConv',\n                     in_channels=256,\n                     feat_channels=64,\n                     out_channels=256,\n                     input_feat_shape=7,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                     norm_cfg=dict(type='LN')),\n                 loss_iou=dict(type='GIoULoss', loss_weight=2.0),\n                 init_cfg=None,\n                 **kwargs):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(DIIHead, self).__init__(\n            num_classes=num_classes,\n            reg_decoded_bbox=True,\n            reg_class_agnostic=True,\n            init_cfg=init_cfg,\n            **kwargs)\n        self.loss_iou = build_loss(loss_iou)\n        self.in_channels = in_channels\n        self.fp16_enabled = False\n        self.attention = MultiheadAttention(in_channels, num_heads, dropout)\n        self.attention_norm = build_norm_layer(dict(type='LN'), in_channels)[1]\n\n        self.instance_interactive_conv = build_transformer(dynamic_conv_cfg)\n        self.instance_interactive_conv_dropout = nn.Dropout(dropout)\n        self.instance_interactive_conv_norm = build_norm_layer(\n            dict(type='LN'), in_channels)[1]\n\n        self.ffn = FFN(\n            in_channels,\n            feedforward_channels,\n            num_ffn_fcs,\n            act_cfg=ffn_act_cfg,\n            dropout=dropout)\n        self.ffn_norm = build_norm_layer(dict(type='LN'), in_channels)[1]\n\n        self.cls_fcs = nn.ModuleList()\n        for _ in range(num_cls_fcs):\n            self.cls_fcs.append(\n                nn.Linear(in_channels, in_channels, bias=False))\n            self.cls_fcs.append(\n                build_norm_layer(dict(type='LN'), in_channels)[1])\n            self.cls_fcs.append(\n                build_activation_layer(dict(type='ReLU', inplace=True)))\n\n        # over load the self.fc_cls in BBoxHead\n        if self.loss_cls.use_sigmoid:\n            self.fc_cls = nn.Linear(in_channels, self.num_classes)\n        else:\n            self.fc_cls = nn.Linear(in_channels, self.num_classes + 1)\n\n        self.reg_fcs = nn.ModuleList()\n        for _ in range(num_reg_fcs):\n            self.reg_fcs.append(\n                nn.Linear(in_channels, in_channels, bias=False))\n            self.reg_fcs.append(\n                build_norm_layer(dict(type='LN'), in_channels)[1])\n            self.reg_fcs.append(\n                build_activation_layer(dict(type='ReLU', inplace=True)))\n        # over load the self.fc_cls in BBoxHead\n        self.fc_reg = nn.Linear(in_channels, 4)\n\n        assert self.reg_class_agnostic, 'DIIHead only ' \\\n            'suppport `reg_class_agnostic=True` '\n        assert self.reg_decoded_bbox, 'DIIHead only ' \\\n            'suppport `reg_decoded_bbox=True`'\n\n    def init_weights(self):\n        \"\"\"Use xavier initialization for all weight parameter and set\n        classification head bias as a specific value when use focal loss.\"\"\"\n        super(DIIHead, self).init_weights()\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n            else:\n                # adopt the default initialization for\n                # the weight and bias of the layer norm\n                pass\n        if self.loss_cls.use_sigmoid:\n            bias_init = bias_init_with_prob(0.01)\n            nn.init.constant_(self.fc_cls.bias, bias_init)\n\n    @auto_fp16()\n    def forward(self, roi_feat, proposal_feat):\n        \"\"\"Forward function of Dynamic Instance Interactive Head.\n\n        Args:\n            roi_feat (Tensor): Roi-pooling features with shape\n                (batch_size*num_proposals, feature_dimensions,\n                pooling_h , pooling_w).\n            proposal_feat (Tensor): Intermediate feature get from\n                diihead in last stage, has shape\n                (batch_size, num_proposals, feature_dimensions)\n\n          Returns:\n                tuple[Tensor]: Usually a tuple of classification scores\n                and bbox prediction and a intermediate feature.\n\n                    - cls_scores (Tensor): Classification scores for\n                      all proposals, has shape\n                      (batch_size, num_proposals, num_classes).\n                    - bbox_preds (Tensor): Box energies / deltas for\n                      all proposals, has shape\n                      (batch_size, num_proposals, 4).\n                    - obj_feat (Tensor): Object feature before classification\n                      and regression subnet, has shape\n                      (batch_size, num_proposal, feature_dimensions).\n        \"\"\"\n        N, num_proposals = proposal_feat.shape[:2]\n\n        # Self attention\n        proposal_feat = proposal_feat.permute(1, 0, 2)\n        proposal_feat = self.attention_norm(self.attention(proposal_feat))\n        attn_feats = proposal_feat.permute(1, 0, 2)\n\n        # instance interactive\n        proposal_feat = attn_feats.reshape(-1, self.in_channels)\n        proposal_feat_iic = self.instance_interactive_conv(\n            proposal_feat, roi_feat)\n        proposal_feat = proposal_feat + self.instance_interactive_conv_dropout(\n            proposal_feat_iic)\n        obj_feat = self.instance_interactive_conv_norm(proposal_feat)\n\n        # FFN\n        obj_feat = self.ffn_norm(self.ffn(obj_feat))\n\n        cls_feat = obj_feat\n        reg_feat = obj_feat\n\n        for cls_layer in self.cls_fcs:\n            cls_feat = cls_layer(cls_feat)\n        for reg_layer in self.reg_fcs:\n            reg_feat = reg_layer(reg_feat)\n\n        cls_score = self.fc_cls(cls_feat).view(\n            N, num_proposals, self.num_classes\n            if self.loss_cls.use_sigmoid else self.num_classes + 1)\n        bbox_delta = self.fc_reg(reg_feat).view(N, num_proposals, 4)\n\n        return cls_score, bbox_delta, obj_feat.view(\n            N, num_proposals, self.in_channels), attn_feats\n\n    @force_fp32(apply_to=('cls_score', 'bbox_pred'))\n    def loss(self,\n             cls_score,\n             bbox_pred,\n             labels,\n             label_weights,\n             bbox_targets,\n             bbox_weights,\n             imgs_whwh=None,\n             reduction_override=None,\n             **kwargs):\n        \"\"\"\"Loss function of DIIHead, get loss of all images.\n\n        Args:\n            cls_score (Tensor): Classification prediction\n                results of all class, has shape\n                (batch_size * num_proposals_single_image, num_classes)\n            bbox_pred (Tensor): Regression prediction results,\n                has shape\n                (batch_size * num_proposals_single_image, 4), the last\n                dimension 4 represents [tl_x, tl_y, br_x, br_y].\n            labels (Tensor): Label of each proposals, has shape\n                (batch_size * num_proposals_single_image\n            label_weights (Tensor): Classification loss\n                weight of each proposals, has shape\n                (batch_size * num_proposals_single_image\n            bbox_targets (Tensor): Regression targets of each\n                proposals, has shape\n                (batch_size * num_proposals_single_image, 4),\n                the last dimension 4 represents\n                [tl_x, tl_y, br_x, br_y].\n            bbox_weights (Tensor): Regression loss weight of each\n                proposals's coordinate, has shape\n                (batch_size * num_proposals_single_image, 4),\n            imgs_whwh (Tensor): imgs_whwh (Tensor): Tensor with\\\n                shape (batch_size, num_proposals, 4), the last\n                dimension means\n                [img_width,img_height, img_width, img_height].\n            reduction_override (str, optional): The reduction\n                method used to override the original reduction\n                method of the loss. Options are \"none\",\n                \"mean\" and \"sum\". Defaults to None,\n\n            Returns:\n                dict[str, Tensor]: Dictionary of loss components\n        \"\"\"\n        losses = dict()\n        bg_class_ind = self.num_classes\n        # note in spare rcnn num_gt == num_pos\n        pos_inds = (labels >= 0) & (labels < bg_class_ind)\n        num_pos = pos_inds.sum().float()\n        avg_factor = reduce_mean(num_pos)\n        if cls_score is not None:\n            if cls_score.numel() > 0:\n                losses['loss_cls'] = self.loss_cls(\n                    cls_score,\n                    labels,\n                    label_weights,\n                    avg_factor=avg_factor,\n                    reduction_override=reduction_override)\n                losses['pos_acc'] = accuracy(cls_score[pos_inds],\n                                             labels[pos_inds])\n        if bbox_pred is not None:\n            # 0~self.num_classes-1 are FG, self.num_classes is BG\n            # do not perform bounding box regression for BG anymore.\n            if pos_inds.any():\n                pos_bbox_pred = bbox_pred.reshape(bbox_pred.size(0),\n                                                  4)[pos_inds.type(torch.bool)]\n                imgs_whwh = imgs_whwh.reshape(bbox_pred.size(0),\n                                              4)[pos_inds.type(torch.bool)]\n                losses['loss_bbox'] = self.loss_bbox(\n                    pos_bbox_pred / imgs_whwh,\n                    bbox_targets[pos_inds.type(torch.bool)] / imgs_whwh,\n                    bbox_weights[pos_inds.type(torch.bool)],\n                    avg_factor=avg_factor)\n                losses['loss_iou'] = self.loss_iou(\n                    pos_bbox_pred,\n                    bbox_targets[pos_inds.type(torch.bool)],\n                    bbox_weights[pos_inds.type(torch.bool)],\n                    avg_factor=avg_factor)\n            else:\n                losses['loss_bbox'] = bbox_pred.sum() * 0\n                losses['loss_iou'] = bbox_pred.sum() * 0\n        return losses\n\n    def _get_target_single(self, pos_inds, neg_inds, pos_bboxes, neg_bboxes,\n                           pos_gt_bboxes, pos_gt_labels, cfg):\n        \"\"\"Calculate the ground truth for proposals in the single image\n        according to the sampling results.\n\n        Almost the same as the implementation in `bbox_head`,\n        we add pos_inds and neg_inds to select positive and\n        negative samples instead of selecting the first num_pos\n        as positive samples.\n\n        Args:\n            pos_inds (Tensor): The length is equal to the\n                positive sample numbers contain all index\n                of the positive sample in the origin proposal set.\n            neg_inds (Tensor): The length is equal to the\n                negative sample numbers contain all index\n                of the negative sample in the origin proposal set.\n            pos_bboxes (Tensor): Contains all the positive boxes,\n                has shape (num_pos, 4), the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            neg_bboxes (Tensor): Contains all the negative boxes,\n                has shape (num_neg, 4), the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            pos_gt_bboxes (Tensor): Contains gt_boxes for\n                all positive samples, has shape (num_pos, 4),\n                the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            pos_gt_labels (Tensor): Contains gt_labels for\n                all positive samples, has shape (num_pos, ).\n            cfg (obj:`ConfigDict`): `train_cfg` of R-CNN.\n\n        Returns:\n            Tuple[Tensor]: Ground truth for proposals in a single image.\n            Containing the following Tensors:\n\n                - labels(Tensor): Gt_labels for all proposals, has\n                  shape (num_proposals,).\n                - label_weights(Tensor): Labels_weights for all proposals, has\n                  shape (num_proposals,).\n                - bbox_targets(Tensor):Regression target for all proposals, has\n                  shape (num_proposals, 4), the last dimension 4\n                  represents [tl_x, tl_y, br_x, br_y].\n                - bbox_weights(Tensor):Regression weights for all proposals,\n                  has shape (num_proposals, 4).\n        \"\"\"\n        num_pos = pos_bboxes.size(0)\n        num_neg = neg_bboxes.size(0)\n        num_samples = num_pos + num_neg\n\n        # original implementation uses new_zeros since BG are set to be 0\n        # now use empty & fill because BG cat_id = num_classes,\n        # FG cat_id = [0, num_classes-1]\n        labels = pos_bboxes.new_full((num_samples, ),\n                                     self.num_classes,\n                                     dtype=torch.long)\n        label_weights = pos_bboxes.new_zeros(num_samples)\n        bbox_targets = pos_bboxes.new_zeros(num_samples, 4)\n        bbox_weights = pos_bboxes.new_zeros(num_samples, 4)\n        if num_pos > 0:\n            labels[pos_inds] = pos_gt_labels\n            pos_weight = 1.0 if cfg.pos_weight <= 0 else cfg.pos_weight\n            label_weights[pos_inds] = pos_weight\n            if not self.reg_decoded_bbox:\n                pos_bbox_targets = self.bbox_coder.encode(\n                    pos_bboxes, pos_gt_bboxes)\n            else:\n                pos_bbox_targets = pos_gt_bboxes\n            bbox_targets[pos_inds, :] = pos_bbox_targets\n            bbox_weights[pos_inds, :] = 1\n        if num_neg > 0:\n            label_weights[neg_inds] = 1.0\n\n        return labels, label_weights, bbox_targets, bbox_weights\n\n    def get_targets(self,\n                    sampling_results,\n                    gt_bboxes,\n                    gt_labels,\n                    rcnn_train_cfg,\n                    concat=True):\n        \"\"\"Calculate the ground truth for all samples in a batch according to\n        the sampling_results.\n\n        Almost the same as the implementation in bbox_head, we passed\n        additional parameters pos_inds_list and neg_inds_list to\n        `_get_target_single` function.\n\n        Args:\n            sampling_results (List[obj:SamplingResults]): Assign results of\n                all images in a batch after sampling.\n            gt_bboxes (list[Tensor]): Gt_bboxes of all images in a batch,\n                each tensor has shape (num_gt, 4),  the last dimension 4\n                represents [tl_x, tl_y, br_x, br_y].\n            gt_labels (list[Tensor]): Gt_labels of all images in a batch,\n                each tensor has shape (num_gt,).\n            rcnn_train_cfg (obj:`ConfigDict`): `train_cfg` of RCNN.\n            concat (bool): Whether to concatenate the results of all\n                the images in a single batch.\n\n        Returns:\n            Tuple[Tensor]: Ground truth for proposals in a single image.\n            Containing the following list of Tensors:\n\n                - labels (list[Tensor],Tensor): Gt_labels for all\n                  proposals in a batch, each tensor in list has\n                  shape (num_proposals,) when `concat=False`, otherwise just\n                  a single tensor has shape (num_all_proposals,).\n                - label_weights (list[Tensor]): Labels_weights for\n                  all proposals in a batch, each tensor in list has shape\n                  (num_proposals,) when `concat=False`, otherwise just a\n                  single tensor has shape (num_all_proposals,).\n                - bbox_targets (list[Tensor],Tensor): Regression target\n                  for all proposals in a batch, each tensor in list has\n                  shape (num_proposals, 4) when `concat=False`, otherwise\n                  just a single tensor has shape (num_all_proposals, 4),\n                  the last dimension 4 represents [tl_x, tl_y, br_x, br_y].\n                - bbox_weights (list[tensor],Tensor): Regression weights for\n                  all proposals in a batch, each tensor in list has shape\n                  (num_proposals, 4) when `concat=False`, otherwise just a\n                  single tensor has shape (num_all_proposals, 4).\n        \"\"\"\n        pos_inds_list = [res.pos_inds for res in sampling_results]\n        neg_inds_list = [res.neg_inds for res in sampling_results]\n        pos_bboxes_list = [res.pos_bboxes for res in sampling_results]\n        neg_bboxes_list = [res.neg_bboxes for res in sampling_results]\n        pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]\n        pos_gt_labels_list = [res.pos_gt_labels for res in sampling_results]\n        labels, label_weights, bbox_targets, bbox_weights = multi_apply(\n            self._get_target_single,\n            pos_inds_list,\n            neg_inds_list,\n            pos_bboxes_list,\n            neg_bboxes_list,\n            pos_gt_bboxes_list,\n            pos_gt_labels_list,\n            cfg=rcnn_train_cfg)\n        if concat:\n            labels = torch.cat(labels, 0)\n            label_weights = torch.cat(label_weights, 0)\n            bbox_targets = torch.cat(bbox_targets, 0)\n            bbox_weights = torch.cat(bbox_weights, 0)\n        return labels, label_weights, bbox_targets, bbox_weights\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/double_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, ModuleList\n\nfrom mmdet.models.backbones.resnet import Bottleneck\nfrom mmdet.models.builder import HEADS\nfrom .bbox_head import BBoxHead\n\n\nclass BasicResBlock(BaseModule):\n    \"\"\"Basic residual block.\n\n    This block is a little different from the block in the ResNet backbone.\n    The kernel size of conv1 is 1 in this block while 3 in ResNet BasicBlock.\n\n    Args:\n        in_channels (int): Channels of the input feature map.\n        out_channels (int): Channels of the output feature map.\n        conv_cfg (dict): The config dict for convolution layers.\n        norm_cfg (dict): The config dict for normalization layers.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 init_cfg=None):\n        super(BasicResBlock, self).__init__(init_cfg)\n\n        # main path\n        self.conv1 = ConvModule(\n            in_channels,\n            in_channels,\n            kernel_size=3,\n            padding=1,\n            bias=False,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg)\n        self.conv2 = ConvModule(\n            in_channels,\n            out_channels,\n            kernel_size=1,\n            bias=False,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=None)\n\n        # identity path\n        self.conv_identity = ConvModule(\n            in_channels,\n            out_channels,\n            kernel_size=1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=None)\n\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        identity = x\n\n        x = self.conv1(x)\n        x = self.conv2(x)\n\n        identity = self.conv_identity(identity)\n        out = x + identity\n\n        out = self.relu(out)\n        return out\n\n\n@HEADS.register_module()\nclass DoubleConvFCBBoxHead(BBoxHead):\n    r\"\"\"Bbox head used in Double-Head R-CNN\n\n    .. code-block:: none\n\n                                          /-> cls\n                      /-> shared convs ->\n                                          \\-> reg\n        roi features\n                                          /-> cls\n                      \\-> shared fc    ->\n                                          \\-> reg\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_convs=0,\n                 num_fcs=0,\n                 conv_out_channels=1024,\n                 fc_out_channels=1024,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 init_cfg=dict(\n                     type='Normal',\n                     override=[\n                         dict(type='Normal', name='fc_cls', std=0.01),\n                         dict(type='Normal', name='fc_reg', std=0.001),\n                         dict(\n                             type='Xavier',\n                             name='fc_branch',\n                             distribution='uniform')\n                     ]),\n                 **kwargs):\n        kwargs.setdefault('with_avg_pool', True)\n        super(DoubleConvFCBBoxHead, self).__init__(init_cfg=init_cfg, **kwargs)\n        assert self.with_avg_pool\n        assert num_convs > 0\n        assert num_fcs > 0\n        self.num_convs = num_convs\n        self.num_fcs = num_fcs\n        self.conv_out_channels = conv_out_channels\n        self.fc_out_channels = fc_out_channels\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n\n        # increase the channel of input features\n        self.res_block = BasicResBlock(self.in_channels,\n                                       self.conv_out_channels)\n\n        # add conv heads\n        self.conv_branch = self._add_conv_branch()\n        # add fc heads\n        self.fc_branch = self._add_fc_branch()\n\n        out_dim_reg = 4 if self.reg_class_agnostic else 4 * self.num_classes\n        self.fc_reg = nn.Linear(self.conv_out_channels, out_dim_reg)\n\n        self.fc_cls = nn.Linear(self.fc_out_channels, self.num_classes + 1)\n        self.relu = nn.ReLU(inplace=True)\n\n    def _add_conv_branch(self):\n        \"\"\"Add the fc branch which consists of a sequential of conv layers.\"\"\"\n        branch_convs = ModuleList()\n        for i in range(self.num_convs):\n            branch_convs.append(\n                Bottleneck(\n                    inplanes=self.conv_out_channels,\n                    planes=self.conv_out_channels // 4,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        return branch_convs\n\n    def _add_fc_branch(self):\n        \"\"\"Add the fc branch which consists of a sequential of fc layers.\"\"\"\n        branch_fcs = ModuleList()\n        for i in range(self.num_fcs):\n            fc_in_channels = (\n                self.in_channels *\n                self.roi_feat_area if i == 0 else self.fc_out_channels)\n            branch_fcs.append(nn.Linear(fc_in_channels, self.fc_out_channels))\n        return branch_fcs\n\n    def forward(self, x_cls, x_reg):\n        # conv head\n        x_conv = self.res_block(x_reg)\n\n        for conv in self.conv_branch:\n            x_conv = conv(x_conv)\n\n        if self.with_avg_pool:\n            x_conv = self.avg_pool(x_conv)\n\n        x_conv = x_conv.view(x_conv.size(0), -1)\n        bbox_pred = self.fc_reg(x_conv)\n\n        # fc head\n        x_fc = x_cls.view(x_cls.size(0), -1)\n        for fc in self.fc_branch:\n            x_fc = self.relu(fc(x_fc))\n\n        cls_score = self.fc_cls(x_fc)\n\n        return cls_score, bbox_pred\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/sabl_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, force_fp32\n\nfrom mmdet.core import build_bbox_coder, multi_apply, multiclass_nms\nfrom mmdet.models.builder import HEADS, build_loss\nfrom mmdet.models.losses import accuracy\n\n\n@HEADS.register_module()\nclass SABLHead(BaseModule):\n    \"\"\"Side-Aware Boundary Localization (SABL) for RoI-Head.\n\n    Side-Aware features are extracted by conv layers\n    with an attention mechanism.\n    Boundary Localization with Bucketing and Bucketing Guided Rescoring\n    are implemented in BucketingBBoxCoder.\n\n    Please refer to https://arxiv.org/abs/1912.04260 for more details.\n\n    Args:\n        cls_in_channels (int): Input channels of cls RoI feature. \\\n            Defaults to 256.\n        reg_in_channels (int): Input channels of reg RoI feature. \\\n            Defaults to 256.\n        roi_feat_size (int): Size of RoI features. Defaults to 7.\n        reg_feat_up_ratio (int): Upsample ratio of reg features. \\\n            Defaults to 2.\n        reg_pre_kernel (int): Kernel of 2D conv layers before \\\n            attention pooling. Defaults to 3.\n        reg_post_kernel (int): Kernel of 1D conv layers after \\\n            attention pooling. Defaults to 3.\n        reg_pre_num (int): Number of pre convs. Defaults to 2.\n        reg_post_num (int): Number of post convs. Defaults to 1.\n        num_classes (int): Number of classes in dataset. Defaults to 80.\n        cls_out_channels (int): Hidden channels in cls fcs. Defaults to 1024.\n        reg_offset_out_channels (int): Hidden and output channel \\\n            of reg offset branch. Defaults to 256.\n        reg_cls_out_channels (int): Hidden and output channel \\\n            of reg cls branch. Defaults to 256.\n        num_cls_fcs (int): Number of fcs for cls branch. Defaults to 1.\n        num_reg_fcs (int): Number of fcs for reg branch.. Defaults to 0.\n        reg_class_agnostic (bool): Class agnostic regression or not. \\\n            Defaults to True.\n        norm_cfg (dict): Config of norm layers. Defaults to None.\n        bbox_coder (dict): Config of bbox coder. Defaults 'BucketingBBoxCoder'.\n        loss_cls (dict): Config of classification loss.\n        loss_bbox_cls (dict): Config of classification loss for bbox branch.\n        loss_bbox_reg (dict): Config of regression loss for bbox branch.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 cls_in_channels=256,\n                 reg_in_channels=256,\n                 roi_feat_size=7,\n                 reg_feat_up_ratio=2,\n                 reg_pre_kernel=3,\n                 reg_post_kernel=3,\n                 reg_pre_num=2,\n                 reg_post_num=1,\n                 cls_out_channels=1024,\n                 reg_offset_out_channels=256,\n                 reg_cls_out_channels=256,\n                 num_cls_fcs=1,\n                 num_reg_fcs=0,\n                 reg_class_agnostic=True,\n                 norm_cfg=None,\n                 bbox_coder=dict(\n                     type='BucketingBBoxCoder',\n                     num_buckets=14,\n                     scale_factor=1.7),\n                 loss_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=False,\n                     loss_weight=1.0),\n                 loss_bbox_cls=dict(\n                     type='CrossEntropyLoss',\n                     use_sigmoid=True,\n                     loss_weight=1.0),\n                 loss_bbox_reg=dict(\n                     type='SmoothL1Loss', beta=0.1, loss_weight=1.0),\n                 init_cfg=None):\n        super(SABLHead, self).__init__(init_cfg)\n        self.cls_in_channels = cls_in_channels\n        self.reg_in_channels = reg_in_channels\n        self.roi_feat_size = roi_feat_size\n        self.reg_feat_up_ratio = int(reg_feat_up_ratio)\n        self.num_buckets = bbox_coder['num_buckets']\n        assert self.reg_feat_up_ratio // 2 >= 1\n        self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio\n        assert self.up_reg_feat_size == bbox_coder['num_buckets']\n        self.reg_pre_kernel = reg_pre_kernel\n        self.reg_post_kernel = reg_post_kernel\n        self.reg_pre_num = reg_pre_num\n        self.reg_post_num = reg_post_num\n        self.num_classes = num_classes\n        self.cls_out_channels = cls_out_channels\n        self.reg_offset_out_channels = reg_offset_out_channels\n        self.reg_cls_out_channels = reg_cls_out_channels\n        self.num_cls_fcs = num_cls_fcs\n        self.num_reg_fcs = num_reg_fcs\n        self.reg_class_agnostic = reg_class_agnostic\n        assert self.reg_class_agnostic\n        self.norm_cfg = norm_cfg\n\n        self.bbox_coder = build_bbox_coder(bbox_coder)\n        self.loss_cls = build_loss(loss_cls)\n        self.loss_bbox_cls = build_loss(loss_bbox_cls)\n        self.loss_bbox_reg = build_loss(loss_bbox_reg)\n\n        self.cls_fcs = self._add_fc_branch(self.num_cls_fcs,\n                                           self.cls_in_channels,\n                                           self.roi_feat_size,\n                                           self.cls_out_channels)\n\n        self.side_num = int(np.ceil(self.num_buckets / 2))\n\n        if self.reg_feat_up_ratio > 1:\n            self.upsample_x = nn.ConvTranspose1d(\n                reg_in_channels,\n                reg_in_channels,\n                self.reg_feat_up_ratio,\n                stride=self.reg_feat_up_ratio)\n            self.upsample_y = nn.ConvTranspose1d(\n                reg_in_channels,\n                reg_in_channels,\n                self.reg_feat_up_ratio,\n                stride=self.reg_feat_up_ratio)\n\n        self.reg_pre_convs = nn.ModuleList()\n        for i in range(self.reg_pre_num):\n            reg_pre_conv = ConvModule(\n                reg_in_channels,\n                reg_in_channels,\n                kernel_size=reg_pre_kernel,\n                padding=reg_pre_kernel // 2,\n                norm_cfg=norm_cfg,\n                act_cfg=dict(type='ReLU'))\n            self.reg_pre_convs.append(reg_pre_conv)\n\n        self.reg_post_conv_xs = nn.ModuleList()\n        for i in range(self.reg_post_num):\n            reg_post_conv_x = ConvModule(\n                reg_in_channels,\n                reg_in_channels,\n                kernel_size=(1, reg_post_kernel),\n                padding=(0, reg_post_kernel // 2),\n                norm_cfg=norm_cfg,\n                act_cfg=dict(type='ReLU'))\n            self.reg_post_conv_xs.append(reg_post_conv_x)\n        self.reg_post_conv_ys = nn.ModuleList()\n        for i in range(self.reg_post_num):\n            reg_post_conv_y = ConvModule(\n                reg_in_channels,\n                reg_in_channels,\n                kernel_size=(reg_post_kernel, 1),\n                padding=(reg_post_kernel // 2, 0),\n                norm_cfg=norm_cfg,\n                act_cfg=dict(type='ReLU'))\n            self.reg_post_conv_ys.append(reg_post_conv_y)\n\n        self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1)\n        self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1)\n\n        self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1)\n        self.relu = nn.ReLU(inplace=True)\n\n        self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs,\n                                               self.reg_in_channels, 1,\n                                               self.reg_cls_out_channels)\n        self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs,\n                                                  self.reg_in_channels, 1,\n                                                  self.reg_offset_out_channels)\n        self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1)\n        self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)\n\n        if init_cfg is None:\n            self.init_cfg = [\n                dict(\n                    type='Xavier',\n                    layer='Linear',\n                    distribution='uniform',\n                    override=[\n                        dict(type='Normal', name='reg_conv_att_x', std=0.01),\n                        dict(type='Normal', name='reg_conv_att_y', std=0.01),\n                        dict(type='Normal', name='fc_reg_cls', std=0.01),\n                        dict(type='Normal', name='fc_cls', std=0.01),\n                        dict(type='Normal', name='fc_reg_offset', std=0.001)\n                    ])\n            ]\n            if self.reg_feat_up_ratio > 1:\n                self.init_cfg += [\n                    dict(\n                        type='Kaiming',\n                        distribution='normal',\n                        override=[\n                            dict(name='upsample_x'),\n                            dict(name='upsample_y')\n                        ])\n                ]\n\n    @property\n    def custom_cls_channels(self):\n        return getattr(self.loss_cls, 'custom_cls_channels', False)\n\n    @property\n    def custom_activation(self):\n        return getattr(self.loss_cls, 'custom_activation', False)\n\n    @property\n    def custom_accuracy(self):\n        return getattr(self.loss_cls, 'custom_accuracy', False)\n\n    def _add_fc_branch(self, num_branch_fcs, in_channels, roi_feat_size,\n                       fc_out_channels):\n        in_channels = in_channels * roi_feat_size * roi_feat_size\n        branch_fcs = nn.ModuleList()\n        for i in range(num_branch_fcs):\n            fc_in_channels = (in_channels if i == 0 else fc_out_channels)\n            branch_fcs.append(nn.Linear(fc_in_channels, fc_out_channels))\n        return branch_fcs\n\n    def cls_forward(self, cls_x):\n        cls_x = cls_x.view(cls_x.size(0), -1)\n        for fc in self.cls_fcs:\n            cls_x = self.relu(fc(cls_x))\n        cls_score = self.fc_cls(cls_x)\n        return cls_score\n\n    def attention_pool(self, reg_x):\n        \"\"\"Extract direction-specific features fx and fy with attention\n        methanism.\"\"\"\n        reg_fx = reg_x\n        reg_fy = reg_x\n        reg_fx_att = self.reg_conv_att_x(reg_fx).sigmoid()\n        reg_fy_att = self.reg_conv_att_y(reg_fy).sigmoid()\n        reg_fx_att = reg_fx_att / reg_fx_att.sum(dim=2).unsqueeze(2)\n        reg_fy_att = reg_fy_att / reg_fy_att.sum(dim=3).unsqueeze(3)\n        reg_fx = (reg_fx * reg_fx_att).sum(dim=2)\n        reg_fy = (reg_fy * reg_fy_att).sum(dim=3)\n        return reg_fx, reg_fy\n\n    def side_aware_feature_extractor(self, reg_x):\n        \"\"\"Refine and extract side-aware features without split them.\"\"\"\n        for reg_pre_conv in self.reg_pre_convs:\n            reg_x = reg_pre_conv(reg_x)\n        reg_fx, reg_fy = self.attention_pool(reg_x)\n\n        if self.reg_post_num > 0:\n            reg_fx = reg_fx.unsqueeze(2)\n            reg_fy = reg_fy.unsqueeze(3)\n            for i in range(self.reg_post_num):\n                reg_fx = self.reg_post_conv_xs[i](reg_fx)\n                reg_fy = self.reg_post_conv_ys[i](reg_fy)\n            reg_fx = reg_fx.squeeze(2)\n            reg_fy = reg_fy.squeeze(3)\n        if self.reg_feat_up_ratio > 1:\n            reg_fx = self.relu(self.upsample_x(reg_fx))\n            reg_fy = self.relu(self.upsample_y(reg_fy))\n        reg_fx = torch.transpose(reg_fx, 1, 2)\n        reg_fy = torch.transpose(reg_fy, 1, 2)\n        return reg_fx.contiguous(), reg_fy.contiguous()\n\n    def reg_pred(self, x, offset_fcs, cls_fcs):\n        \"\"\"Predict bucketing estimation (cls_pred) and fine regression (offset\n        pred) with side-aware features.\"\"\"\n        x_offset = x.view(-1, self.reg_in_channels)\n        x_cls = x.view(-1, self.reg_in_channels)\n\n        for fc in offset_fcs:\n            x_offset = self.relu(fc(x_offset))\n        for fc in cls_fcs:\n            x_cls = self.relu(fc(x_cls))\n        offset_pred = self.fc_reg_offset(x_offset)\n        cls_pred = self.fc_reg_cls(x_cls)\n\n        offset_pred = offset_pred.view(x.size(0), -1)\n        cls_pred = cls_pred.view(x.size(0), -1)\n\n        return offset_pred, cls_pred\n\n    def side_aware_split(self, feat):\n        \"\"\"Split side-aware features aligned with orders of bucketing\n        targets.\"\"\"\n        l_end = int(np.ceil(self.up_reg_feat_size / 2))\n        r_start = int(np.floor(self.up_reg_feat_size / 2))\n        feat_fl = feat[:, :l_end]\n        feat_fr = feat[:, r_start:].flip(dims=(1, ))\n        feat_fl = feat_fl.contiguous()\n        feat_fr = feat_fr.contiguous()\n        feat = torch.cat([feat_fl, feat_fr], dim=-1)\n        return feat\n\n    def bbox_pred_split(self, bbox_pred, num_proposals_per_img):\n        \"\"\"Split batch bbox prediction back to each image.\"\"\"\n        bucket_cls_preds, bucket_offset_preds = bbox_pred\n        bucket_cls_preds = bucket_cls_preds.split(num_proposals_per_img, 0)\n        bucket_offset_preds = bucket_offset_preds.split(\n            num_proposals_per_img, 0)\n        bbox_pred = tuple(zip(bucket_cls_preds, bucket_offset_preds))\n        return bbox_pred\n\n    def reg_forward(self, reg_x):\n        outs = self.side_aware_feature_extractor(reg_x)\n        edge_offset_preds = []\n        edge_cls_preds = []\n        reg_fx = outs[0]\n        reg_fy = outs[1]\n        offset_pred_x, cls_pred_x = self.reg_pred(reg_fx, self.reg_offset_fcs,\n                                                  self.reg_cls_fcs)\n        offset_pred_y, cls_pred_y = self.reg_pred(reg_fy, self.reg_offset_fcs,\n                                                  self.reg_cls_fcs)\n        offset_pred_x = self.side_aware_split(offset_pred_x)\n        offset_pred_y = self.side_aware_split(offset_pred_y)\n        cls_pred_x = self.side_aware_split(cls_pred_x)\n        cls_pred_y = self.side_aware_split(cls_pred_y)\n        edge_offset_preds = torch.cat([offset_pred_x, offset_pred_y], dim=-1)\n        edge_cls_preds = torch.cat([cls_pred_x, cls_pred_y], dim=-1)\n\n        return (edge_cls_preds, edge_offset_preds)\n\n    def forward(self, x):\n\n        bbox_pred = self.reg_forward(x)\n        cls_score = self.cls_forward(x)\n\n        return cls_score, bbox_pred\n\n    def get_targets(self, sampling_results, gt_bboxes, gt_labels,\n                    rcnn_train_cfg):\n        pos_proposals = [res.pos_bboxes for res in sampling_results]\n        neg_proposals = [res.neg_bboxes for res in sampling_results]\n        pos_gt_bboxes = [res.pos_gt_bboxes for res in sampling_results]\n        pos_gt_labels = [res.pos_gt_labels for res in sampling_results]\n        cls_reg_targets = self.bucket_target(pos_proposals, neg_proposals,\n                                             pos_gt_bboxes, pos_gt_labels,\n                                             rcnn_train_cfg)\n        (labels, label_weights, bucket_cls_targets, bucket_cls_weights,\n         bucket_offset_targets, bucket_offset_weights) = cls_reg_targets\n        return (labels, label_weights, (bucket_cls_targets,\n                                        bucket_offset_targets),\n                (bucket_cls_weights, bucket_offset_weights))\n\n    def bucket_target(self,\n                      pos_proposals_list,\n                      neg_proposals_list,\n                      pos_gt_bboxes_list,\n                      pos_gt_labels_list,\n                      rcnn_train_cfg,\n                      concat=True):\n        (labels, label_weights, bucket_cls_targets, bucket_cls_weights,\n         bucket_offset_targets, bucket_offset_weights) = multi_apply(\n             self._bucket_target_single,\n             pos_proposals_list,\n             neg_proposals_list,\n             pos_gt_bboxes_list,\n             pos_gt_labels_list,\n             cfg=rcnn_train_cfg)\n\n        if concat:\n            labels = torch.cat(labels, 0)\n            label_weights = torch.cat(label_weights, 0)\n            bucket_cls_targets = torch.cat(bucket_cls_targets, 0)\n            bucket_cls_weights = torch.cat(bucket_cls_weights, 0)\n            bucket_offset_targets = torch.cat(bucket_offset_targets, 0)\n            bucket_offset_weights = torch.cat(bucket_offset_weights, 0)\n        return (labels, label_weights, bucket_cls_targets, bucket_cls_weights,\n                bucket_offset_targets, bucket_offset_weights)\n\n    def _bucket_target_single(self, pos_proposals, neg_proposals,\n                              pos_gt_bboxes, pos_gt_labels, cfg):\n        \"\"\"Compute bucketing estimation targets and fine regression targets for\n        a single image.\n\n        Args:\n            pos_proposals (Tensor): positive proposals of a single image,\n                 Shape (n_pos, 4)\n            neg_proposals (Tensor): negative proposals of a single image,\n                 Shape (n_neg, 4).\n            pos_gt_bboxes (Tensor): gt bboxes assigned to positive proposals\n                 of a single image, Shape (n_pos, 4).\n            pos_gt_labels (Tensor): gt labels assigned to positive proposals\n                 of a single image, Shape (n_pos, ).\n            cfg (dict): Config of calculating targets\n\n        Returns:\n            tuple:\n\n                - labels (Tensor): Labels in a single image. \\\n                    Shape (n,).\n                - label_weights (Tensor): Label weights in a single image.\\\n                    Shape (n,)\n                - bucket_cls_targets (Tensor): Bucket cls targets in \\\n                    a single image. Shape (n, num_buckets*2).\n                - bucket_cls_weights (Tensor): Bucket cls weights in \\\n                    a single image. Shape (n, num_buckets*2).\n                - bucket_offset_targets (Tensor): Bucket offset targets \\\n                    in a single image. Shape (n, num_buckets*2).\n                - bucket_offset_targets (Tensor): Bucket offset weights \\\n                    in a single image. Shape (n, num_buckets*2).\n        \"\"\"\n        num_pos = pos_proposals.size(0)\n        num_neg = neg_proposals.size(0)\n        num_samples = num_pos + num_neg\n        labels = pos_gt_bboxes.new_full((num_samples, ),\n                                        self.num_classes,\n                                        dtype=torch.long)\n        label_weights = pos_proposals.new_zeros(num_samples)\n        bucket_cls_targets = pos_proposals.new_zeros(num_samples,\n                                                     4 * self.side_num)\n        bucket_cls_weights = pos_proposals.new_zeros(num_samples,\n                                                     4 * self.side_num)\n        bucket_offset_targets = pos_proposals.new_zeros(\n            num_samples, 4 * self.side_num)\n        bucket_offset_weights = pos_proposals.new_zeros(\n            num_samples, 4 * self.side_num)\n        if num_pos > 0:\n            labels[:num_pos] = pos_gt_labels\n            label_weights[:num_pos] = 1.0\n            (pos_bucket_offset_targets, pos_bucket_offset_weights,\n             pos_bucket_cls_targets,\n             pos_bucket_cls_weights) = self.bbox_coder.encode(\n                 pos_proposals, pos_gt_bboxes)\n            bucket_cls_targets[:num_pos, :] = pos_bucket_cls_targets\n            bucket_cls_weights[:num_pos, :] = pos_bucket_cls_weights\n            bucket_offset_targets[:num_pos, :] = pos_bucket_offset_targets\n            bucket_offset_weights[:num_pos, :] = pos_bucket_offset_weights\n        if num_neg > 0:\n            label_weights[-num_neg:] = 1.0\n        return (labels, label_weights, bucket_cls_targets, bucket_cls_weights,\n                bucket_offset_targets, bucket_offset_weights)\n\n    def loss(self,\n             cls_score,\n             bbox_pred,\n             rois,\n             labels,\n             label_weights,\n             bbox_targets,\n             bbox_weights,\n             reduction_override=None):\n        losses = dict()\n        if cls_score is not None:\n            avg_factor = max(torch.sum(label_weights > 0).float().item(), 1.)\n            losses['loss_cls'] = self.loss_cls(\n                cls_score,\n                labels,\n                label_weights,\n                avg_factor=avg_factor,\n                reduction_override=reduction_override)\n            losses['acc'] = accuracy(cls_score, labels)\n\n        if bbox_pred is not None:\n            bucket_cls_preds, bucket_offset_preds = bbox_pred\n            bucket_cls_targets, bucket_offset_targets = bbox_targets\n            bucket_cls_weights, bucket_offset_weights = bbox_weights\n            # edge cls\n            bucket_cls_preds = bucket_cls_preds.view(-1, self.side_num)\n            bucket_cls_targets = bucket_cls_targets.view(-1, self.side_num)\n            bucket_cls_weights = bucket_cls_weights.view(-1, self.side_num)\n            losses['loss_bbox_cls'] = self.loss_bbox_cls(\n                bucket_cls_preds,\n                bucket_cls_targets,\n                bucket_cls_weights,\n                avg_factor=bucket_cls_targets.size(0),\n                reduction_override=reduction_override)\n\n            losses['loss_bbox_reg'] = self.loss_bbox_reg(\n                bucket_offset_preds,\n                bucket_offset_targets,\n                bucket_offset_weights,\n                avg_factor=bucket_offset_targets.size(0),\n                reduction_override=reduction_override)\n\n        return losses\n\n    @force_fp32(apply_to=('cls_score', 'bbox_pred'))\n    def get_bboxes(self,\n                   rois,\n                   cls_score,\n                   bbox_pred,\n                   img_shape,\n                   scale_factor,\n                   rescale=False,\n                   cfg=None):\n        if isinstance(cls_score, list):\n            cls_score = sum(cls_score) / float(len(cls_score))\n        scores = F.softmax(cls_score, dim=1) if cls_score is not None else None\n\n        if bbox_pred is not None:\n            bboxes, confidences = self.bbox_coder.decode(\n                rois[:, 1:], bbox_pred, img_shape)\n        else:\n            bboxes = rois[:, 1:].clone()\n            confidences = None\n            if img_shape is not None:\n                bboxes[:, [0, 2]].clamp_(min=0, max=img_shape[1] - 1)\n                bboxes[:, [1, 3]].clamp_(min=0, max=img_shape[0] - 1)\n\n        if rescale and bboxes.size(0) > 0:\n            if isinstance(scale_factor, float):\n                bboxes /= scale_factor\n            else:\n                bboxes /= torch.from_numpy(scale_factor).to(bboxes.device)\n\n        if cfg is None:\n            return bboxes, scores\n        else:\n            det_bboxes, det_labels = multiclass_nms(\n                bboxes,\n                scores,\n                cfg.score_thr,\n                cfg.nms,\n                cfg.max_per_img,\n                score_factors=confidences)\n\n            return det_bboxes, det_labels\n\n    @force_fp32(apply_to=('bbox_preds', ))\n    def refine_bboxes(self, rois, labels, bbox_preds, pos_is_gts, img_metas):\n        \"\"\"Refine bboxes during training.\n\n        Args:\n            rois (Tensor): Shape (n*bs, 5), where n is image number per GPU,\n                and bs is the sampled RoIs per image.\n            labels (Tensor): Shape (n*bs, ).\n            bbox_preds (list[Tensor]): Shape [(n*bs, num_buckets*2), \\\n                (n*bs, num_buckets*2)].\n            pos_is_gts (list[Tensor]): Flags indicating if each positive bbox\n                is a gt bbox.\n            img_metas (list[dict]): Meta info of each image.\n\n        Returns:\n            list[Tensor]: Refined bboxes of each image in a mini-batch.\n        \"\"\"\n        img_ids = rois[:, 0].long().unique(sorted=True)\n        assert img_ids.numel() == len(img_metas)\n\n        bboxes_list = []\n        for i in range(len(img_metas)):\n            inds = torch.nonzero(\n                rois[:, 0] == i, as_tuple=False).squeeze(dim=1)\n            num_rois = inds.numel()\n\n            bboxes_ = rois[inds, 1:]\n            label_ = labels[inds]\n            edge_cls_preds, edge_offset_preds = bbox_preds\n            edge_cls_preds_ = edge_cls_preds[inds]\n            edge_offset_preds_ = edge_offset_preds[inds]\n            bbox_pred_ = [edge_cls_preds_, edge_offset_preds_]\n            img_meta_ = img_metas[i]\n            pos_is_gts_ = pos_is_gts[i]\n\n            bboxes = self.regress_by_class(bboxes_, label_, bbox_pred_,\n                                           img_meta_)\n            # filter gt bboxes\n            pos_keep = 1 - pos_is_gts_\n            keep_inds = pos_is_gts_.new_ones(num_rois)\n            keep_inds[:len(pos_is_gts_)] = pos_keep\n\n            bboxes_list.append(bboxes[keep_inds.type(torch.bool)])\n\n        return bboxes_list\n\n    @force_fp32(apply_to=('bbox_pred', ))\n    def regress_by_class(self, rois, label, bbox_pred, img_meta):\n        \"\"\"Regress the bbox for the predicted class. Used in Cascade R-CNN.\n\n        Args:\n            rois (Tensor): shape (n, 4) or (n, 5)\n            label (Tensor): shape (n, )\n            bbox_pred (list[Tensor]): shape [(n, num_buckets *2), \\\n                (n, num_buckets *2)]\n            img_meta (dict): Image meta info.\n\n        Returns:\n            Tensor: Regressed bboxes, the same shape as input rois.\n        \"\"\"\n        assert rois.size(1) == 4 or rois.size(1) == 5\n\n        if rois.size(1) == 4:\n            new_rois, _ = self.bbox_coder.decode(rois, bbox_pred,\n                                                 img_meta['img_shape'])\n        else:\n            bboxes, _ = self.bbox_coder.decode(rois[:, 1:], bbox_pred,\n                                               img_meta['img_shape'])\n            new_rois = torch.cat((rois[:, [0]], bboxes), dim=1)\n\n        return new_rois\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/bbox_heads/scnet_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.builder import HEADS\nfrom .convfc_bbox_head import ConvFCBBoxHead\n\n\n@HEADS.register_module()\nclass SCNetBBoxHead(ConvFCBBoxHead):\n    \"\"\"BBox head for `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    This inherits ``ConvFCBBoxHead`` with modified forward() function, allow us\n    to get intermediate shared feature.\n    \"\"\"\n\n    def _forward_shared(self, x):\n        \"\"\"Forward function for shared part.\"\"\"\n        if self.num_shared_convs > 0:\n            for conv in self.shared_convs:\n                x = conv(x)\n\n        if self.num_shared_fcs > 0:\n            if self.with_avg_pool:\n                x = self.avg_pool(x)\n\n            x = x.flatten(1)\n\n            for fc in self.shared_fcs:\n                x = self.relu(fc(x))\n\n        return x\n\n    def _forward_cls_reg(self, x):\n        \"\"\"Forward function for classification and regression parts.\"\"\"\n        x_cls = x\n        x_reg = x\n\n        for conv in self.cls_convs:\n            x_cls = conv(x_cls)\n        if x_cls.dim() > 2:\n            if self.with_avg_pool:\n                x_cls = self.avg_pool(x_cls)\n            x_cls = x_cls.flatten(1)\n        for fc in self.cls_fcs:\n            x_cls = self.relu(fc(x_cls))\n\n        for conv in self.reg_convs:\n            x_reg = conv(x_reg)\n        if x_reg.dim() > 2:\n            if self.with_avg_pool:\n                x_reg = self.avg_pool(x_reg)\n            x_reg = x_reg.flatten(1)\n        for fc in self.reg_fcs:\n            x_reg = self.relu(fc(x_reg))\n\n        cls_score = self.fc_cls(x_cls) if self.with_cls else None\n        bbox_pred = self.fc_reg(x_reg) if self.with_reg else None\n\n        return cls_score, bbox_pred\n\n    def forward(self, x, return_shared_feat=False):\n        \"\"\"Forward function.\n\n        Args:\n            x (Tensor): input features\n            return_shared_feat (bool): If True, return cls-reg-shared feature.\n\n        Return:\n            out (tuple[Tensor]): contain ``cls_score`` and ``bbox_pred``,\n                if  ``return_shared_feat`` is True, append ``x_shared`` to the\n                returned tuple.\n        \"\"\"\n        x_shared = self._forward_shared(x)\n        out = self._forward_cls_reg(x_shared)\n\n        if return_shared_feat:\n            out += (x_shared, )\n\n        return out\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/cascade_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import ModuleList\n\nfrom mmdet.core import (bbox2result, bbox2roi, bbox_mapping, build_assigner,\n                        build_sampler, merge_aug_bboxes, merge_aug_masks,\n                        multiclass_nms)\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom .base_roi_head import BaseRoIHead\nfrom .test_mixins import BBoxTestMixin, MaskTestMixin\n\n\n@HEADS.register_module()\nclass CascadeRoIHead(BaseRoIHead, BBoxTestMixin, MaskTestMixin):\n    \"\"\"Cascade roi head including one bbox head and one mask head.\n\n    https://arxiv.org/abs/1712.00726\n    \"\"\"\n\n    def __init__(self,\n                 num_stages,\n                 stage_loss_weights,\n                 bbox_roi_extractor=None,\n                 bbox_head=None,\n                 mask_roi_extractor=None,\n                 mask_head=None,\n                 shared_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        assert bbox_roi_extractor is not None\n        assert bbox_head is not None\n        assert shared_head is None, \\\n            'Shared head is not supported in Cascade RCNN anymore'\n\n        self.num_stages = num_stages\n        self.stage_loss_weights = stage_loss_weights\n        super(CascadeRoIHead, self).__init__(\n            bbox_roi_extractor=bbox_roi_extractor,\n            bbox_head=bbox_head,\n            mask_roi_extractor=mask_roi_extractor,\n            mask_head=mask_head,\n            shared_head=shared_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n\n    def init_bbox_head(self, bbox_roi_extractor, bbox_head):\n        \"\"\"Initialize box head and box roi extractor.\n\n        Args:\n            bbox_roi_extractor (dict): Config of box roi extractor.\n            bbox_head (dict): Config of box in box head.\n        \"\"\"\n        self.bbox_roi_extractor = ModuleList()\n        self.bbox_head = ModuleList()\n        if not isinstance(bbox_roi_extractor, list):\n            bbox_roi_extractor = [\n                bbox_roi_extractor for _ in range(self.num_stages)\n            ]\n        if not isinstance(bbox_head, list):\n            bbox_head = [bbox_head for _ in range(self.num_stages)]\n        assert len(bbox_roi_extractor) == len(bbox_head) == self.num_stages\n        for roi_extractor, head in zip(bbox_roi_extractor, bbox_head):\n            self.bbox_roi_extractor.append(build_roi_extractor(roi_extractor))\n            self.bbox_head.append(build_head(head))\n\n    def init_mask_head(self, mask_roi_extractor, mask_head):\n        \"\"\"Initialize mask head and mask roi extractor.\n\n        Args:\n            mask_roi_extractor (dict): Config of mask roi extractor.\n            mask_head (dict): Config of mask in mask head.\n        \"\"\"\n        self.mask_head = nn.ModuleList()\n        if not isinstance(mask_head, list):\n            mask_head = [mask_head for _ in range(self.num_stages)]\n        assert len(mask_head) == self.num_stages\n        for head in mask_head:\n            self.mask_head.append(build_head(head))\n        if mask_roi_extractor is not None:\n            self.share_roi_extractor = False\n            self.mask_roi_extractor = ModuleList()\n            if not isinstance(mask_roi_extractor, list):\n                mask_roi_extractor = [\n                    mask_roi_extractor for _ in range(self.num_stages)\n                ]\n            assert len(mask_roi_extractor) == self.num_stages\n            for roi_extractor in mask_roi_extractor:\n                self.mask_roi_extractor.append(\n                    build_roi_extractor(roi_extractor))\n        else:\n            self.share_roi_extractor = True\n            self.mask_roi_extractor = self.bbox_roi_extractor\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler for each stage.\"\"\"\n        self.bbox_assigner = []\n        self.bbox_sampler = []\n        if self.train_cfg is not None:\n            for idx, rcnn_train_cfg in enumerate(self.train_cfg):\n                self.bbox_assigner.append(\n                    build_assigner(rcnn_train_cfg.assigner))\n                self.current_stage = idx\n                self.bbox_sampler.append(\n                    build_sampler(rcnn_train_cfg.sampler, context=self))\n\n    def forward_dummy(self, x, proposals):\n        \"\"\"Dummy forward function.\"\"\"\n        # bbox head\n        outs = ()\n        rois = bbox2roi([proposals])\n        if self.with_bbox:\n            for i in range(self.num_stages):\n                bbox_results = self._bbox_forward(i, x, rois)\n                outs = outs + (bbox_results['cls_score'],\n                               bbox_results['bbox_pred'])\n        # mask heads\n        if self.with_mask:\n            mask_rois = rois[:100]\n            for i in range(self.num_stages):\n                mask_results = self._mask_forward(i, x, mask_rois)\n                outs = outs + (mask_results['mask_pred'], )\n        return outs\n\n    def _bbox_forward(self, stage, x, rois):\n        \"\"\"Box head forward function used in both training and testing.\"\"\"\n        bbox_roi_extractor = self.bbox_roi_extractor[stage]\n        bbox_head = self.bbox_head[stage]\n        bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],\n                                        rois)\n        # do not support caffe_c4 model anymore\n        cls_score, bbox_pred = bbox_head(bbox_feats)\n\n        bbox_results = dict(\n            cls_score=cls_score, bbox_pred=bbox_pred, bbox_feats=bbox_feats)\n        return bbox_results\n\n    def _bbox_forward_train(self, stage, x, sampling_results, gt_bboxes,\n                            gt_labels, rcnn_train_cfg):\n        \"\"\"Run forward function and calculate loss for box head in training.\"\"\"\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(stage, x, rois)\n        bbox_targets = self.bbox_head[stage].get_targets(\n            sampling_results, gt_bboxes, gt_labels, rcnn_train_cfg)\n        loss_bbox = self.bbox_head[stage].loss(bbox_results['cls_score'],\n                                               bbox_results['bbox_pred'], rois,\n                                               *bbox_targets)\n\n        bbox_results.update(\n            loss_bbox=loss_bbox, rois=rois, bbox_targets=bbox_targets)\n        return bbox_results\n\n    def _mask_forward(self, stage, x, rois):\n        \"\"\"Mask head forward function used in both training and testing.\"\"\"\n        mask_roi_extractor = self.mask_roi_extractor[stage]\n        mask_head = self.mask_head[stage]\n        mask_feats = mask_roi_extractor(x[:mask_roi_extractor.num_inputs],\n                                        rois)\n        # do not support caffe_c4 model anymore\n        mask_pred = mask_head(mask_feats)\n\n        mask_results = dict(mask_pred=mask_pred)\n        return mask_results\n\n    def _mask_forward_train(self,\n                            stage,\n                            x,\n                            sampling_results,\n                            gt_masks,\n                            rcnn_train_cfg,\n                            bbox_feats=None):\n        \"\"\"Run forward function and calculate loss for mask head in\n        training.\"\"\"\n        pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n        mask_results = self._mask_forward(stage, x, pos_rois)\n\n        mask_targets = self.mask_head[stage].get_targets(\n            sampling_results, gt_masks, rcnn_train_cfg)\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        loss_mask = self.mask_head[stage].loss(mask_results['mask_pred'],\n                                               mask_targets, pos_labels)\n\n        mask_results.update(loss_mask=loss_mask)\n        return mask_results\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None):\n        \"\"\"\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            proposals (list[Tensors]): list of region proposals.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_masks (None | Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        losses = dict()\n        for i in range(self.num_stages):\n            self.current_stage = i\n            rcnn_train_cfg = self.train_cfg[i]\n            lw = self.stage_loss_weights[i]\n\n            # assign gts and sample proposals\n            sampling_results = []\n            if self.with_bbox or self.with_mask:\n                bbox_assigner = self.bbox_assigner[i]\n                bbox_sampler = self.bbox_sampler[i]\n                num_imgs = len(img_metas)\n                if gt_bboxes_ignore is None:\n                    gt_bboxes_ignore = [None for _ in range(num_imgs)]\n\n                for j in range(num_imgs):\n                    assign_result = bbox_assigner.assign(\n                        proposal_list[j], gt_bboxes[j], gt_bboxes_ignore[j],\n                        gt_labels[j])\n                    sampling_result = bbox_sampler.sample(\n                        assign_result,\n                        proposal_list[j],\n                        gt_bboxes[j],\n                        gt_labels[j],\n                        feats=[lvl_feat[j][None] for lvl_feat in x])\n                    sampling_results.append(sampling_result)\n\n            # bbox head forward and loss\n            bbox_results = self._bbox_forward_train(i, x, sampling_results,\n                                                    gt_bboxes, gt_labels,\n                                                    rcnn_train_cfg)\n\n            for name, value in bbox_results['loss_bbox'].items():\n                losses[f's{i}.{name}'] = (\n                    value * lw if 'loss' in name else value)\n\n            # mask head forward and loss\n            if self.with_mask:\n                mask_results = self._mask_forward_train(\n                    i, x, sampling_results, gt_masks, rcnn_train_cfg,\n                    bbox_results['bbox_feats'])\n                for name, value in mask_results['loss_mask'].items():\n                    losses[f's{i}.{name}'] = (\n                        value * lw if 'loss' in name else value)\n\n            # refine bboxes\n            if i < self.num_stages - 1:\n                pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                # bbox_targets is a tuple\n                roi_labels = bbox_results['bbox_targets'][0]\n                with torch.no_grad():\n                    cls_score = bbox_results['cls_score']\n                    if self.bbox_head[i].custom_activation:\n                        cls_score = self.bbox_head[i].loss_cls.get_activation(\n                            cls_score)\n\n                    # Empty proposal.\n                    if cls_score.numel() == 0:\n                        break\n\n                    roi_labels = torch.where(\n                        roi_labels == self.bbox_head[i].num_classes,\n                        cls_score[:, :-1].argmax(1), roi_labels)\n                    proposal_list = self.bbox_head[i].refine_bboxes(\n                        bbox_results['rois'], roi_labels,\n                        bbox_results['bbox_pred'], pos_is_gts, img_metas)\n\n        return losses\n\n    def simple_test(self, x, proposal_list, img_metas, rescale=False):\n        \"\"\"Test without augmentation.\n\n        Args:\n            x (tuple[Tensor]): Features from upstream network. Each\n                has shape (batch_size, c, h, w).\n            proposal_list (list(Tensor)): Proposals from rpn head.\n                Each has shape (num_proposals, 5), last dimension\n                5 represent (x1, y1, x2, y2, score).\n            img_metas (list[dict]): Meta information of images.\n            rescale (bool): Whether to rescale the results to\n                the original image. Default: True.\n\n        Returns:\n            list[list[np.ndarray]] or list[tuple]: When no mask branch,\n            it is bbox results of each image and classes with type\n            `list[list[np.ndarray]]`. The outer list\n            corresponds to each image. The inner list\n            corresponds to each class. When the model has mask branch,\n            it contains bbox results and mask results.\n            The outer list corresponds to each image, and first element\n            of tuple is bbox results, second element is mask results.\n        \"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        num_imgs = len(proposal_list)\n        img_shapes = tuple(meta['img_shape'] for meta in img_metas)\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        # \"ms\" in variable names means multi-stage\n        ms_bbox_result = {}\n        ms_segm_result = {}\n        ms_scores = []\n        rcnn_test_cfg = self.test_cfg\n\n        rois = bbox2roi(proposal_list)\n\n        if rois.shape[0] == 0:\n            # There is no proposal in the whole batch\n            bbox_results = [[\n                np.zeros((0, 5), dtype=np.float32)\n                for _ in range(self.bbox_head[-1].num_classes)\n            ]] * num_imgs\n\n            if self.with_mask:\n                mask_classes = self.mask_head[-1].num_classes\n                segm_results = [[[] for _ in range(mask_classes)]\n                                for _ in range(num_imgs)]\n                results = list(zip(bbox_results, segm_results))\n            else:\n                results = bbox_results\n\n            return results\n\n        for i in range(self.num_stages):\n            bbox_results = self._bbox_forward(i, x, rois)\n\n            # split batch bbox prediction back to each image\n            cls_score = bbox_results['cls_score']\n            bbox_pred = bbox_results['bbox_pred']\n            num_proposals_per_img = tuple(\n                len(proposals) for proposals in proposal_list)\n            rois = rois.split(num_proposals_per_img, 0)\n            cls_score = cls_score.split(num_proposals_per_img, 0)\n            if isinstance(bbox_pred, torch.Tensor):\n                bbox_pred = bbox_pred.split(num_proposals_per_img, 0)\n            else:\n                bbox_pred = self.bbox_head[i].bbox_pred_split(\n                    bbox_pred, num_proposals_per_img)\n            ms_scores.append(cls_score)\n\n            if i < self.num_stages - 1:\n                if self.bbox_head[i].custom_activation:\n                    cls_score = [\n                        self.bbox_head[i].loss_cls.get_activation(s)\n                        for s in cls_score\n                    ]\n                refine_rois_list = []\n                for j in range(num_imgs):\n                    if rois[j].shape[0] > 0:\n                        bbox_label = cls_score[j][:, :-1].argmax(dim=1)\n                        refined_rois = self.bbox_head[i].regress_by_class(\n                            rois[j], bbox_label, bbox_pred[j], img_metas[j])\n                        refine_rois_list.append(refined_rois)\n                rois = torch.cat(refine_rois_list)\n\n        # average scores of each image by stages\n        cls_score = [\n            sum([score[i] for score in ms_scores]) / float(len(ms_scores))\n            for i in range(num_imgs)\n        ]\n\n        # apply bbox post-processing to each image individually\n        det_bboxes = []\n        det_labels = []\n        for i in range(num_imgs):\n            det_bbox, det_label = self.bbox_head[-1].get_bboxes(\n                rois[i],\n                cls_score[i],\n                bbox_pred[i],\n                img_shapes[i],\n                scale_factors[i],\n                rescale=rescale,\n                cfg=rcnn_test_cfg)\n            det_bboxes.append(det_bbox)\n            det_labels.append(det_label)\n\n        bbox_results = [\n            bbox2result(det_bboxes[i], det_labels[i],\n                        self.bbox_head[-1].num_classes)\n            for i in range(num_imgs)\n        ]\n        ms_bbox_result['ensemble'] = bbox_results\n\n        if self.with_mask:\n            if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n                mask_classes = self.mask_head[-1].num_classes\n                segm_results = [[[] for _ in range(mask_classes)]\n                                for _ in range(num_imgs)]\n            else:\n                if rescale and not isinstance(scale_factors[0], float):\n                    scale_factors = [\n                        torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                        for scale_factor in scale_factors\n                    ]\n                _bboxes = [\n                    det_bboxes[i][:, :4] *\n                    scale_factors[i] if rescale else det_bboxes[i][:, :4]\n                    for i in range(len(det_bboxes))\n                ]\n                mask_rois = bbox2roi(_bboxes)\n                num_mask_rois_per_img = tuple(\n                    _bbox.size(0) for _bbox in _bboxes)\n                aug_masks = []\n                for i in range(self.num_stages):\n                    mask_results = self._mask_forward(i, x, mask_rois)\n                    mask_pred = mask_results['mask_pred']\n                    # split batch mask prediction back to each image\n                    mask_pred = mask_pred.split(num_mask_rois_per_img, 0)\n                    aug_masks.append([\n                        m.sigmoid().cpu().detach().numpy() for m in mask_pred\n                    ])\n\n                # apply mask post-processing to each image individually\n                segm_results = []\n                for i in range(num_imgs):\n                    if det_bboxes[i].shape[0] == 0:\n                        segm_results.append(\n                            [[]\n                             for _ in range(self.mask_head[-1].num_classes)])\n                    else:\n                        aug_mask = [mask[i] for mask in aug_masks]\n                        merged_masks = merge_aug_masks(\n                            aug_mask, [[img_metas[i]]] * self.num_stages,\n                            rcnn_test_cfg)\n                        segm_result = self.mask_head[-1].get_seg_masks(\n                            merged_masks, _bboxes[i], det_labels[i],\n                            rcnn_test_cfg, ori_shapes[i], scale_factors[i],\n                            rescale)\n                        segm_results.append(segm_result)\n            ms_segm_result['ensemble'] = segm_results\n\n        if self.with_mask:\n            results = list(\n                zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble']))\n        else:\n            results = ms_bbox_result['ensemble']\n\n        return results\n\n    def aug_test(self, features, proposal_list, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        rcnn_test_cfg = self.test_cfg\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta in zip(features, img_metas):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n            flip_direction = img_meta[0]['flip_direction']\n\n            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                     scale_factor, flip, flip_direction)\n            # \"ms\" in variable names means multi-stage\n            ms_scores = []\n\n            rois = bbox2roi([proposals])\n\n            if rois.shape[0] == 0:\n                # There is no proposal in the single image\n                aug_bboxes.append(rois.new_zeros(0, 4))\n                aug_scores.append(rois.new_zeros(0, 1))\n                continue\n\n            for i in range(self.num_stages):\n                bbox_results = self._bbox_forward(i, x, rois)\n                ms_scores.append(bbox_results['cls_score'])\n\n                if i < self.num_stages - 1:\n                    cls_score = bbox_results['cls_score']\n                    if self.bbox_head[i].custom_activation:\n                        cls_score = self.bbox_head[i].loss_cls.get_activation(\n                            cls_score)\n                    bbox_label = cls_score[:, :-1].argmax(dim=1)\n                    rois = self.bbox_head[i].regress_by_class(\n                        rois, bbox_label, bbox_results['bbox_pred'],\n                        img_meta[0])\n\n            cls_score = sum(ms_scores) / float(len(ms_scores))\n            bboxes, scores = self.bbox_head[-1].get_bboxes(\n                rois,\n                cls_score,\n                bbox_results['bbox_pred'],\n                img_shape,\n                scale_factor,\n                rescale=False,\n                cfg=None)\n            aug_bboxes.append(bboxes)\n            aug_scores.append(scores)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,\n                                                rcnn_test_cfg.score_thr,\n                                                rcnn_test_cfg.nms,\n                                                rcnn_test_cfg.max_per_img)\n\n        bbox_result = bbox2result(det_bboxes, det_labels,\n                                  self.bbox_head[-1].num_classes)\n\n        if self.with_mask:\n            if det_bboxes.shape[0] == 0:\n                segm_result = [[]\n                               for _ in range(self.mask_head[-1].num_classes)]\n            else:\n                aug_masks = []\n                aug_img_metas = []\n                for x, img_meta in zip(features, img_metas):\n                    img_shape = img_meta[0]['img_shape']\n                    scale_factor = img_meta[0]['scale_factor']\n                    flip = img_meta[0]['flip']\n                    flip_direction = img_meta[0]['flip_direction']\n                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                           scale_factor, flip, flip_direction)\n                    mask_rois = bbox2roi([_bboxes])\n                    for i in range(self.num_stages):\n                        mask_results = self._mask_forward(i, x, mask_rois)\n                        aug_masks.append(\n                            mask_results['mask_pred'].sigmoid().cpu().numpy())\n                        aug_img_metas.append(img_meta)\n                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,\n                                               self.test_cfg)\n\n                ori_shape = img_metas[0][0]['ori_shape']\n                dummy_scale_factor = np.ones(4)\n                segm_result = self.mask_head[-1].get_seg_masks(\n                    merged_masks,\n                    det_bboxes,\n                    det_labels,\n                    rcnn_test_cfg,\n                    ori_shape,\n                    scale_factor=dummy_scale_factor,\n                    rescale=False)\n            return [(bbox_result, segm_result)]\n        else:\n            return [bbox_result]\n\n    def onnx_export(self, x, proposals, img_metas):\n\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        assert proposals.shape[0] == 1, 'Only support one input image ' \\\n                                        'while in exporting to ONNX'\n        # remove the scores\n        rois = proposals[..., :-1]\n        batch_size = rois.shape[0]\n        num_proposals_per_img = rois.shape[1]\n        # Eliminate the batch dimension\n        rois = rois.view(-1, 4)\n\n        # add dummy batch index\n        rois = torch.cat([rois.new_zeros(rois.shape[0], 1), rois], dim=-1)\n\n        max_shape = img_metas[0]['img_shape_for_onnx']\n        ms_scores = []\n        rcnn_test_cfg = self.test_cfg\n\n        for i in range(self.num_stages):\n            bbox_results = self._bbox_forward(i, x, rois)\n\n            cls_score = bbox_results['cls_score']\n            bbox_pred = bbox_results['bbox_pred']\n            # Recover the batch dimension\n            rois = rois.reshape(batch_size, num_proposals_per_img,\n                                rois.size(-1))\n            cls_score = cls_score.reshape(batch_size, num_proposals_per_img,\n                                          cls_score.size(-1))\n            bbox_pred = bbox_pred.reshape(batch_size, num_proposals_per_img, 4)\n            ms_scores.append(cls_score)\n            if i < self.num_stages - 1:\n                assert self.bbox_head[i].reg_class_agnostic\n                new_rois = self.bbox_head[i].bbox_coder.decode(\n                    rois[..., 1:], bbox_pred, max_shape=max_shape)\n                rois = new_rois.reshape(-1, new_rois.shape[-1])\n                # add dummy batch index\n                rois = torch.cat([rois.new_zeros(rois.shape[0], 1), rois],\n                                 dim=-1)\n\n        cls_score = sum(ms_scores) / float(len(ms_scores))\n        bbox_pred = bbox_pred.reshape(batch_size, num_proposals_per_img, 4)\n        rois = rois.reshape(batch_size, num_proposals_per_img, -1)\n        det_bboxes, det_labels = self.bbox_head[-1].onnx_export(\n            rois, cls_score, bbox_pred, max_shape, cfg=rcnn_test_cfg)\n\n        if not self.with_mask:\n            return det_bboxes, det_labels\n        else:\n            batch_index = torch.arange(\n                det_bboxes.size(0),\n                device=det_bboxes.device).float().view(-1, 1, 1).expand(\n                    det_bboxes.size(0), det_bboxes.size(1), 1)\n            rois = det_bboxes[..., :4]\n            mask_rois = torch.cat([batch_index, rois], dim=-1)\n            mask_rois = mask_rois.view(-1, 5)\n            aug_masks = []\n            for i in range(self.num_stages):\n                mask_results = self._mask_forward(i, x, mask_rois)\n                mask_pred = mask_results['mask_pred']\n                aug_masks.append(mask_pred)\n            max_shape = img_metas[0]['img_shape_for_onnx']\n            # calculate the mean of masks from several stage\n            mask_pred = sum(aug_masks) / len(aug_masks)\n            segm_results = self.mask_head[-1].onnx_export(\n                mask_pred, rois.reshape(-1, 4), det_labels.reshape(-1),\n                self.test_cfg, max_shape)\n            segm_results = segm_results.reshape(batch_size,\n                                                det_bboxes.shape[1],\n                                                max_shape[0], max_shape[1])\n            return det_bboxes, det_labels, segm_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/double_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom ..builder import HEADS\nfrom .standard_roi_head import StandardRoIHead\n\n\n@HEADS.register_module()\nclass DoubleHeadRoIHead(StandardRoIHead):\n    \"\"\"RoI head for Double Head RCNN.\n\n    https://arxiv.org/abs/1904.06493\n    \"\"\"\n\n    def __init__(self, reg_roi_scale_factor, **kwargs):\n        super(DoubleHeadRoIHead, self).__init__(**kwargs)\n        self.reg_roi_scale_factor = reg_roi_scale_factor\n\n    def _bbox_forward(self, x, rois):\n        \"\"\"Box head forward function used in both training and testing time.\"\"\"\n        bbox_cls_feats = self.bbox_roi_extractor(\n            x[:self.bbox_roi_extractor.num_inputs], rois)\n        bbox_reg_feats = self.bbox_roi_extractor(\n            x[:self.bbox_roi_extractor.num_inputs],\n            rois,\n            roi_scale_factor=self.reg_roi_scale_factor)\n        if self.with_shared_head:\n            bbox_cls_feats = self.shared_head(bbox_cls_feats)\n            bbox_reg_feats = self.shared_head(bbox_reg_feats)\n        cls_score, bbox_pred = self.bbox_head(bbox_cls_feats, bbox_reg_feats)\n\n        bbox_results = dict(\n            cls_score=cls_score,\n            bbox_pred=bbox_pred,\n            bbox_feats=bbox_cls_feats)\n        return bbox_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/dynamic_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core import bbox2roi\nfrom mmdet.models.losses import SmoothL1Loss\nfrom ..builder import HEADS\nfrom .standard_roi_head import StandardRoIHead\n\nEPS = 1e-15\n\n\n@HEADS.register_module()\nclass DynamicRoIHead(StandardRoIHead):\n    \"\"\"RoI head for `Dynamic R-CNN <https://arxiv.org/abs/2004.06002>`_.\"\"\"\n\n    def __init__(self, **kwargs):\n        super(DynamicRoIHead, self).__init__(**kwargs)\n        assert isinstance(self.bbox_head.loss_bbox, SmoothL1Loss)\n        # the IoU history of the past `update_iter_interval` iterations\n        self.iou_history = []\n        # the beta history of the past `update_iter_interval` iterations\n        self.beta_history = []\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None):\n        \"\"\"Forward function for training.\n\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n\n            proposals (list[Tensors]): list of region proposals.\n\n            gt_bboxes (list[Tensor]): each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n\n            gt_labels (list[Tensor]): class indices corresponding to each box\n\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n            gt_masks (None | Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # assign gts and sample proposals\n        if self.with_bbox or self.with_mask:\n            num_imgs = len(img_metas)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n            sampling_results = []\n            cur_iou = []\n            for i in range(num_imgs):\n                assign_result = self.bbox_assigner.assign(\n                    proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],\n                    gt_labels[i])\n                sampling_result = self.bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[i],\n                    gt_bboxes[i],\n                    gt_labels[i],\n                    feats=[lvl_feat[i][None] for lvl_feat in x])\n                # record the `iou_topk`-th largest IoU in an image\n                iou_topk = min(self.train_cfg.dynamic_rcnn.iou_topk,\n                               len(assign_result.max_overlaps))\n                ious, _ = torch.topk(assign_result.max_overlaps, iou_topk)\n                cur_iou.append(ious[-1].item())\n                sampling_results.append(sampling_result)\n            # average the current IoUs over images\n            cur_iou = np.mean(cur_iou)\n            self.iou_history.append(cur_iou)\n\n        losses = dict()\n        # bbox head forward and loss\n        if self.with_bbox:\n            bbox_results = self._bbox_forward_train(x, sampling_results,\n                                                    gt_bboxes, gt_labels,\n                                                    img_metas)\n            losses.update(bbox_results['loss_bbox'])\n\n        # mask head forward and loss\n        if self.with_mask:\n            mask_results = self._mask_forward_train(x, sampling_results,\n                                                    bbox_results['bbox_feats'],\n                                                    gt_masks, img_metas)\n            losses.update(mask_results['loss_mask'])\n\n        # update IoU threshold and SmoothL1 beta\n        update_iter_interval = self.train_cfg.dynamic_rcnn.update_iter_interval\n        if len(self.iou_history) % update_iter_interval == 0:\n            new_iou_thr, new_beta = self.update_hyperparameters()\n\n        return losses\n\n    def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels,\n                            img_metas):\n        num_imgs = len(img_metas)\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(x, rois)\n\n        bbox_targets = self.bbox_head.get_targets(sampling_results, gt_bboxes,\n                                                  gt_labels, self.train_cfg)\n        # record the `beta_topk`-th smallest target\n        # `bbox_targets[2]` and `bbox_targets[3]` stand for bbox_targets\n        # and bbox_weights, respectively\n        pos_inds = bbox_targets[3][:, 0].nonzero().squeeze(1)\n        num_pos = len(pos_inds)\n        cur_target = bbox_targets[2][pos_inds, :2].abs().mean(dim=1)\n        beta_topk = min(self.train_cfg.dynamic_rcnn.beta_topk * num_imgs,\n                        num_pos)\n        cur_target = torch.kthvalue(cur_target, beta_topk)[0].item()\n        self.beta_history.append(cur_target)\n        loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],\n                                        bbox_results['bbox_pred'], rois,\n                                        *bbox_targets)\n\n        bbox_results.update(loss_bbox=loss_bbox)\n        return bbox_results\n\n    def update_hyperparameters(self):\n        \"\"\"Update hyperparameters like IoU thresholds for assigner and beta for\n        SmoothL1 loss based on the training statistics.\n\n        Returns:\n            tuple[float]: the updated ``iou_thr`` and ``beta``.\n        \"\"\"\n        new_iou_thr = max(self.train_cfg.dynamic_rcnn.initial_iou,\n                          np.mean(self.iou_history))\n        self.iou_history = []\n        self.bbox_assigner.pos_iou_thr = new_iou_thr\n        self.bbox_assigner.neg_iou_thr = new_iou_thr\n        self.bbox_assigner.min_pos_iou = new_iou_thr\n        if (np.median(self.beta_history) < EPS):\n            # avoid 0 or too small value for new_beta\n            new_beta = self.bbox_head.loss_bbox.beta\n        else:\n            new_beta = min(self.train_cfg.dynamic_rcnn.initial_beta,\n                           np.median(self.beta_history))\n        self.beta_history = []\n        self.bbox_head.loss_bbox.beta = new_beta\n        return new_iou_thr, new_beta\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/grid_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core import bbox2result, bbox2roi\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom .standard_roi_head import StandardRoIHead\n\n\n@HEADS.register_module()\nclass GridRoIHead(StandardRoIHead):\n    \"\"\"Grid roi head for Grid R-CNN.\n\n    https://arxiv.org/abs/1811.12030\n    \"\"\"\n\n    def __init__(self, grid_roi_extractor, grid_head, **kwargs):\n        assert grid_head is not None\n        super(GridRoIHead, self).__init__(**kwargs)\n        if grid_roi_extractor is not None:\n            self.grid_roi_extractor = build_roi_extractor(grid_roi_extractor)\n            self.share_roi_extractor = False\n        else:\n            self.share_roi_extractor = True\n            self.grid_roi_extractor = self.bbox_roi_extractor\n        self.grid_head = build_head(grid_head)\n\n    def _random_jitter(self, sampling_results, img_metas, amplitude=0.15):\n        \"\"\"Ramdom jitter positive proposals for training.\"\"\"\n        for sampling_result, img_meta in zip(sampling_results, img_metas):\n            bboxes = sampling_result.pos_bboxes\n            random_offsets = bboxes.new_empty(bboxes.shape[0], 4).uniform_(\n                -amplitude, amplitude)\n            # before jittering\n            cxcy = (bboxes[:, 2:4] + bboxes[:, :2]) / 2\n            wh = (bboxes[:, 2:4] - bboxes[:, :2]).abs()\n            # after jittering\n            new_cxcy = cxcy + wh * random_offsets[:, :2]\n            new_wh = wh * (1 + random_offsets[:, 2:])\n            # xywh to xyxy\n            new_x1y1 = (new_cxcy - new_wh / 2)\n            new_x2y2 = (new_cxcy + new_wh / 2)\n            new_bboxes = torch.cat([new_x1y1, new_x2y2], dim=1)\n            # clip bboxes\n            max_shape = img_meta['img_shape']\n            if max_shape is not None:\n                new_bboxes[:, 0::2].clamp_(min=0, max=max_shape[1] - 1)\n                new_bboxes[:, 1::2].clamp_(min=0, max=max_shape[0] - 1)\n\n            sampling_result.pos_bboxes = new_bboxes\n        return sampling_results\n\n    def forward_dummy(self, x, proposals):\n        \"\"\"Dummy forward function.\"\"\"\n        # bbox head\n        outs = ()\n        rois = bbox2roi([proposals])\n        if self.with_bbox:\n            bbox_results = self._bbox_forward(x, rois)\n            outs = outs + (bbox_results['cls_score'],\n                           bbox_results['bbox_pred'])\n\n        # grid head\n        grid_rois = rois[:100]\n        grid_feats = self.grid_roi_extractor(\n            x[:self.grid_roi_extractor.num_inputs], grid_rois)\n        if self.with_shared_head:\n            grid_feats = self.shared_head(grid_feats)\n        grid_pred = self.grid_head(grid_feats)\n        outs = outs + (grid_pred, )\n\n        # mask head\n        if self.with_mask:\n            mask_rois = rois[:100]\n            mask_results = self._mask_forward(x, mask_rois)\n            outs = outs + (mask_results['mask_pred'], )\n        return outs\n\n    def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels,\n                            img_metas):\n        \"\"\"Run forward function and calculate loss for box head in training.\"\"\"\n        bbox_results = super(GridRoIHead,\n                             self)._bbox_forward_train(x, sampling_results,\n                                                       gt_bboxes, gt_labels,\n                                                       img_metas)\n\n        # Grid head forward and loss\n        sampling_results = self._random_jitter(sampling_results, img_metas)\n        pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n\n        # GN in head does not support zero shape input\n        if pos_rois.shape[0] == 0:\n            return bbox_results\n\n        grid_feats = self.grid_roi_extractor(\n            x[:self.grid_roi_extractor.num_inputs], pos_rois)\n        if self.with_shared_head:\n            grid_feats = self.shared_head(grid_feats)\n        # Accelerate training\n        max_sample_num_grid = self.train_cfg.get('max_num_grid', 192)\n        sample_idx = torch.randperm(\n            grid_feats.shape[0])[:min(grid_feats.shape[0], max_sample_num_grid\n                                      )]\n        grid_feats = grid_feats[sample_idx]\n\n        grid_pred = self.grid_head(grid_feats)\n\n        grid_targets = self.grid_head.get_targets(sampling_results,\n                                                  self.train_cfg)\n        grid_targets = grid_targets[sample_idx]\n\n        loss_grid = self.grid_head.loss(grid_pred, grid_targets)\n\n        bbox_results['loss_bbox'].update(loss_grid)\n        return bbox_results\n\n    def simple_test(self,\n                    x,\n                    proposal_list,\n                    img_metas,\n                    proposals=None,\n                    rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n\n        det_bboxes, det_labels = self.simple_test_bboxes(\n            x, img_metas, proposal_list, self.test_cfg, rescale=False)\n        # pack rois into bboxes\n        grid_rois = bbox2roi([det_bbox[:, :4] for det_bbox in det_bboxes])\n        if grid_rois.shape[0] != 0:\n            grid_feats = self.grid_roi_extractor(\n                x[:len(self.grid_roi_extractor.featmap_strides)], grid_rois)\n            self.grid_head.test_mode = True\n            grid_pred = self.grid_head(grid_feats)\n            # split batch grid head prediction back to each image\n            num_roi_per_img = tuple(len(det_bbox) for det_bbox in det_bboxes)\n            grid_pred = {\n                k: v.split(num_roi_per_img, 0)\n                for k, v in grid_pred.items()\n            }\n\n            # apply bbox post-processing to each image individually\n            bbox_results = []\n            num_imgs = len(det_bboxes)\n            for i in range(num_imgs):\n                if det_bboxes[i].shape[0] == 0:\n                    bbox_results.append([\n                        np.zeros((0, 5), dtype=np.float32)\n                        for _ in range(self.bbox_head.num_classes)\n                    ])\n                else:\n                    det_bbox = self.grid_head.get_bboxes(\n                        det_bboxes[i], grid_pred['fused'][i], [img_metas[i]])\n                    if rescale:\n                        det_bbox[:, :4] /= img_metas[i]['scale_factor']\n                    bbox_results.append(\n                        bbox2result(det_bbox, det_labels[i],\n                                    self.bbox_head.num_classes))\n        else:\n            bbox_results = [[\n                np.zeros((0, 5), dtype=np.float32)\n                for _ in range(self.bbox_head.num_classes)\n            ] for _ in range(len(det_bboxes))]\n\n        if not self.with_mask:\n            return bbox_results\n        else:\n            segm_results = self.simple_test_mask(\n                x, img_metas, det_bboxes, det_labels, rescale=rescale)\n            return list(zip(bbox_results, segm_results))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/htc_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\n\nfrom mmdet.core import (bbox2result, bbox2roi, bbox_mapping, merge_aug_bboxes,\n                        merge_aug_masks, multiclass_nms)\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom ..utils.brick_wrappers import adaptive_avg_pool2d\nfrom .cascade_roi_head import CascadeRoIHead\n\n\n@HEADS.register_module()\nclass HybridTaskCascadeRoIHead(CascadeRoIHead):\n    \"\"\"Hybrid task cascade roi head including one bbox head and one mask head.\n\n    https://arxiv.org/abs/1901.07518\n    \"\"\"\n\n    def __init__(self,\n                 num_stages,\n                 stage_loss_weights,\n                 semantic_roi_extractor=None,\n                 semantic_head=None,\n                 semantic_fusion=('bbox', 'mask'),\n                 interleaved=True,\n                 mask_info_flow=True,\n                 **kwargs):\n        super(HybridTaskCascadeRoIHead,\n              self).__init__(num_stages, stage_loss_weights, **kwargs)\n        assert self.with_bbox\n        assert not self.with_shared_head  # shared head is not supported\n\n        if semantic_head is not None:\n            self.semantic_roi_extractor = build_roi_extractor(\n                semantic_roi_extractor)\n            self.semantic_head = build_head(semantic_head)\n\n        self.semantic_fusion = semantic_fusion\n        self.interleaved = interleaved\n        self.mask_info_flow = mask_info_flow\n\n    @property\n    def with_semantic(self):\n        \"\"\"bool: whether the head has semantic head\"\"\"\n        if hasattr(self, 'semantic_head') and self.semantic_head is not None:\n            return True\n        else:\n            return False\n\n    def forward_dummy(self, x, proposals):\n        \"\"\"Dummy forward function.\"\"\"\n        outs = ()\n        # semantic head\n        if self.with_semantic:\n            _, semantic_feat = self.semantic_head(x)\n        else:\n            semantic_feat = None\n        # bbox heads\n        rois = bbox2roi([proposals])\n        for i in range(self.num_stages):\n            bbox_results = self._bbox_forward(\n                i, x, rois, semantic_feat=semantic_feat)\n            outs = outs + (bbox_results['cls_score'],\n                           bbox_results['bbox_pred'])\n        # mask heads\n        if self.with_mask:\n            mask_rois = rois[:100]\n            mask_roi_extractor = self.mask_roi_extractor[-1]\n            mask_feats = mask_roi_extractor(\n                x[:len(mask_roi_extractor.featmap_strides)], mask_rois)\n            if self.with_semantic and 'mask' in self.semantic_fusion:\n                mask_semantic_feat = self.semantic_roi_extractor(\n                    [semantic_feat], mask_rois)\n                mask_feats = mask_feats + mask_semantic_feat\n            last_feat = None\n            for i in range(self.num_stages):\n                mask_head = self.mask_head[i]\n                if self.mask_info_flow:\n                    mask_pred, last_feat = mask_head(mask_feats, last_feat)\n                else:\n                    mask_pred = mask_head(mask_feats)\n                outs = outs + (mask_pred, )\n        return outs\n\n    def _bbox_forward_train(self,\n                            stage,\n                            x,\n                            sampling_results,\n                            gt_bboxes,\n                            gt_labels,\n                            rcnn_train_cfg,\n                            semantic_feat=None):\n        \"\"\"Run forward function and calculate loss for box head in training.\"\"\"\n        bbox_head = self.bbox_head[stage]\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(\n            stage, x, rois, semantic_feat=semantic_feat)\n\n        bbox_targets = bbox_head.get_targets(sampling_results, gt_bboxes,\n                                             gt_labels, rcnn_train_cfg)\n        loss_bbox = bbox_head.loss(bbox_results['cls_score'],\n                                   bbox_results['bbox_pred'], rois,\n                                   *bbox_targets)\n\n        bbox_results.update(\n            loss_bbox=loss_bbox,\n            rois=rois,\n            bbox_targets=bbox_targets,\n        )\n        return bbox_results\n\n    def _mask_forward_train(self,\n                            stage,\n                            x,\n                            sampling_results,\n                            gt_masks,\n                            rcnn_train_cfg,\n                            semantic_feat=None):\n        \"\"\"Run forward function and calculate loss for mask head in\n        training.\"\"\"\n        mask_roi_extractor = self.mask_roi_extractor[stage]\n        mask_head = self.mask_head[stage]\n        pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n        mask_feats = mask_roi_extractor(x[:mask_roi_extractor.num_inputs],\n                                        pos_rois)\n\n        # semantic feature fusion\n        # element-wise sum for original features and pooled semantic features\n        if self.with_semantic and 'mask' in self.semantic_fusion:\n            mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             pos_rois)\n            if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:\n                mask_semantic_feat = F.adaptive_avg_pool2d(\n                    mask_semantic_feat, mask_feats.shape[-2:])\n            mask_feats = mask_feats + mask_semantic_feat\n\n        # mask information flow\n        # forward all previous mask heads to obtain last_feat, and fuse it\n        # with the normal mask feature\n        if self.mask_info_flow:\n            last_feat = None\n            for i in range(stage):\n                last_feat = self.mask_head[i](\n                    mask_feats, last_feat, return_logits=False)\n            mask_pred = mask_head(mask_feats, last_feat, return_feat=False)\n        else:\n            mask_pred = mask_head(mask_feats, return_feat=False)\n\n        mask_targets = mask_head.get_targets(sampling_results, gt_masks,\n                                             rcnn_train_cfg)\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        loss_mask = mask_head.loss(mask_pred, mask_targets, pos_labels)\n\n        mask_results = dict(loss_mask=loss_mask)\n        return mask_results\n\n    def _bbox_forward(self, stage, x, rois, semantic_feat=None):\n        \"\"\"Box head forward function used in both training and testing.\"\"\"\n        bbox_roi_extractor = self.bbox_roi_extractor[stage]\n        bbox_head = self.bbox_head[stage]\n        bbox_feats = bbox_roi_extractor(\n            x[:len(bbox_roi_extractor.featmap_strides)], rois)\n        if self.with_semantic and 'bbox' in self.semantic_fusion:\n            bbox_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             rois)\n            if bbox_semantic_feat.shape[-2:] != bbox_feats.shape[-2:]:\n                bbox_semantic_feat = adaptive_avg_pool2d(\n                    bbox_semantic_feat, bbox_feats.shape[-2:])\n            bbox_feats = bbox_feats + bbox_semantic_feat\n        cls_score, bbox_pred = bbox_head(bbox_feats)\n\n        bbox_results = dict(cls_score=cls_score, bbox_pred=bbox_pred)\n        return bbox_results\n\n    def _mask_forward_test(self, stage, x, bboxes, semantic_feat=None):\n        \"\"\"Mask head forward function for testing.\"\"\"\n        mask_roi_extractor = self.mask_roi_extractor[stage]\n        mask_head = self.mask_head[stage]\n        mask_rois = bbox2roi([bboxes])\n        mask_feats = mask_roi_extractor(\n            x[:len(mask_roi_extractor.featmap_strides)], mask_rois)\n        if self.with_semantic and 'mask' in self.semantic_fusion:\n            mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             mask_rois)\n            if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:\n                mask_semantic_feat = F.adaptive_avg_pool2d(\n                    mask_semantic_feat, mask_feats.shape[-2:])\n            mask_feats = mask_feats + mask_semantic_feat\n        if self.mask_info_flow:\n            last_feat = None\n            last_pred = None\n            for i in range(stage):\n                mask_pred, last_feat = self.mask_head[i](mask_feats, last_feat)\n                if last_pred is not None:\n                    mask_pred = mask_pred + last_pred\n                last_pred = mask_pred\n            mask_pred = mask_head(mask_feats, last_feat, return_feat=False)\n            if last_pred is not None:\n                mask_pred = mask_pred + last_pred\n        else:\n            mask_pred = mask_head(mask_feats)\n        return mask_pred\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      gt_semantic_seg=None):\n        \"\"\"\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n\n            proposal_list (list[Tensors]): list of region proposals.\n\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n\n            gt_labels (list[Tensor]): class indices corresponding to each box\n\n            gt_bboxes_ignore (None, list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n\n            gt_masks (None, Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n            gt_semantic_seg (None, list[Tensor]): semantic segmentation masks\n                used if the architecture supports semantic segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # semantic segmentation part\n        # 2 outputs: segmentation prediction and embedded features\n        losses = dict()\n        if self.with_semantic:\n            semantic_pred, semantic_feat = self.semantic_head(x)\n            loss_seg = self.semantic_head.loss(semantic_pred, gt_semantic_seg)\n            losses['loss_semantic_seg'] = loss_seg\n        else:\n            semantic_feat = None\n\n        for i in range(self.num_stages):\n            self.current_stage = i\n            rcnn_train_cfg = self.train_cfg[i]\n            lw = self.stage_loss_weights[i]\n\n            # assign gts and sample proposals\n            sampling_results = []\n            bbox_assigner = self.bbox_assigner[i]\n            bbox_sampler = self.bbox_sampler[i]\n            num_imgs = len(img_metas)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n\n            for j in range(num_imgs):\n                assign_result = bbox_assigner.assign(proposal_list[j],\n                                                     gt_bboxes[j],\n                                                     gt_bboxes_ignore[j],\n                                                     gt_labels[j])\n                sampling_result = bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[j],\n                    gt_bboxes[j],\n                    gt_labels[j],\n                    feats=[lvl_feat[j][None] for lvl_feat in x])\n                sampling_results.append(sampling_result)\n\n            # bbox head forward and loss\n            bbox_results = \\\n                self._bbox_forward_train(\n                    i, x, sampling_results, gt_bboxes, gt_labels,\n                    rcnn_train_cfg, semantic_feat)\n            roi_labels = bbox_results['bbox_targets'][0]\n\n            for name, value in bbox_results['loss_bbox'].items():\n                losses[f's{i}.{name}'] = (\n                    value * lw if 'loss' in name else value)\n\n            # mask head forward and loss\n            if self.with_mask:\n                # interleaved execution: use regressed bboxes by the box branch\n                # to train the mask branch\n                if self.interleaved:\n                    pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                    with torch.no_grad():\n                        proposal_list = self.bbox_head[i].refine_bboxes(\n                            bbox_results['rois'], roi_labels,\n                            bbox_results['bbox_pred'], pos_is_gts, img_metas)\n                        # re-assign and sample 512 RoIs from 512 RoIs\n                        sampling_results = []\n                        for j in range(num_imgs):\n                            assign_result = bbox_assigner.assign(\n                                proposal_list[j], gt_bboxes[j],\n                                gt_bboxes_ignore[j], gt_labels[j])\n                            sampling_result = bbox_sampler.sample(\n                                assign_result,\n                                proposal_list[j],\n                                gt_bboxes[j],\n                                gt_labels[j],\n                                feats=[lvl_feat[j][None] for lvl_feat in x])\n                            sampling_results.append(sampling_result)\n                mask_results = self._mask_forward_train(\n                    i, x, sampling_results, gt_masks, rcnn_train_cfg,\n                    semantic_feat)\n                for name, value in mask_results['loss_mask'].items():\n                    losses[f's{i}.{name}'] = (\n                        value * lw if 'loss' in name else value)\n\n            # refine bboxes (same as Cascade R-CNN)\n            if i < self.num_stages - 1 and not self.interleaved:\n                pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                with torch.no_grad():\n                    proposal_list = self.bbox_head[i].refine_bboxes(\n                        bbox_results['rois'], roi_labels,\n                        bbox_results['bbox_pred'], pos_is_gts, img_metas)\n\n        return losses\n\n    def simple_test(self, x, proposal_list, img_metas, rescale=False):\n        \"\"\"Test without augmentation.\n\n        Args:\n            x (tuple[Tensor]): Features from upstream network. Each\n                has shape (batch_size, c, h, w).\n            proposal_list (list(Tensor)): Proposals from rpn head.\n                Each has shape (num_proposals, 5), last dimension\n                5 represent (x1, y1, x2, y2, score).\n            img_metas (list[dict]): Meta information of images.\n            rescale (bool): Whether to rescale the results to\n                the original image. Default: True.\n\n        Returns:\n            list[list[np.ndarray]] or list[tuple]: When no mask branch,\n            it is bbox results of each image and classes with type\n            `list[list[np.ndarray]]`. The outer list\n            corresponds to each image. The inner list\n            corresponds to each class. When the model has mask branch,\n            it contains bbox results and mask results.\n            The outer list corresponds to each image, and first element\n            of tuple is bbox results, second element is mask results.\n        \"\"\"\n        if self.with_semantic:\n            _, semantic_feat = self.semantic_head(x)\n        else:\n            semantic_feat = None\n\n        num_imgs = len(proposal_list)\n        img_shapes = tuple(meta['img_shape'] for meta in img_metas)\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        # \"ms\" in variable names means multi-stage\n        ms_bbox_result = {}\n        ms_segm_result = {}\n        ms_scores = []\n        rcnn_test_cfg = self.test_cfg\n\n        rois = bbox2roi(proposal_list)\n\n        if rois.shape[0] == 0:\n            # There is no proposal in the whole batch\n            bbox_results = [[\n                np.zeros((0, 5), dtype=np.float32)\n                for _ in range(self.bbox_head[-1].num_classes)\n            ]] * num_imgs\n\n            if self.with_mask:\n                mask_classes = self.mask_head[-1].num_classes\n                segm_results = [[[] for _ in range(mask_classes)]\n                                for _ in range(num_imgs)]\n                results = list(zip(bbox_results, segm_results))\n            else:\n                results = bbox_results\n\n            return results\n\n        for i in range(self.num_stages):\n            bbox_head = self.bbox_head[i]\n            bbox_results = self._bbox_forward(\n                i, x, rois, semantic_feat=semantic_feat)\n            # split batch bbox prediction back to each image\n            cls_score = bbox_results['cls_score']\n            bbox_pred = bbox_results['bbox_pred']\n            num_proposals_per_img = tuple(len(p) for p in proposal_list)\n            rois = rois.split(num_proposals_per_img, 0)\n            cls_score = cls_score.split(num_proposals_per_img, 0)\n            bbox_pred = bbox_pred.split(num_proposals_per_img, 0)\n            ms_scores.append(cls_score)\n\n            if i < self.num_stages - 1:\n                refine_rois_list = []\n                for j in range(num_imgs):\n                    if rois[j].shape[0] > 0:\n                        bbox_label = cls_score[j][:, :-1].argmax(dim=1)\n                        refine_rois = bbox_head.regress_by_class(\n                            rois[j], bbox_label, bbox_pred[j], img_metas[j])\n                        refine_rois_list.append(refine_rois)\n                rois = torch.cat(refine_rois_list)\n\n        # average scores of each image by stages\n        cls_score = [\n            sum([score[i] for score in ms_scores]) / float(len(ms_scores))\n            for i in range(num_imgs)\n        ]\n\n        # apply bbox post-processing to each image individually\n        det_bboxes = []\n        det_labels = []\n        for i in range(num_imgs):\n            det_bbox, det_label = self.bbox_head[-1].get_bboxes(\n                rois[i],\n                cls_score[i],\n                bbox_pred[i],\n                img_shapes[i],\n                scale_factors[i],\n                rescale=rescale,\n                cfg=rcnn_test_cfg)\n            det_bboxes.append(det_bbox)\n            det_labels.append(det_label)\n        bbox_result = [\n            bbox2result(det_bboxes[i], det_labels[i],\n                        self.bbox_head[-1].num_classes)\n            for i in range(num_imgs)\n        ]\n        ms_bbox_result['ensemble'] = bbox_result\n\n        if self.with_mask:\n            if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n                mask_classes = self.mask_head[-1].num_classes\n                segm_results = [[[] for _ in range(mask_classes)]\n                                for _ in range(num_imgs)]\n            else:\n                if rescale and not isinstance(scale_factors[0], float):\n                    scale_factors = [\n                        torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                        for scale_factor in scale_factors\n                    ]\n                _bboxes = [\n                    det_bboxes[i][:, :4] *\n                    scale_factors[i] if rescale else det_bboxes[i]\n                    for i in range(num_imgs)\n                ]\n                mask_rois = bbox2roi(_bboxes)\n                aug_masks = []\n                mask_roi_extractor = self.mask_roi_extractor[-1]\n                mask_feats = mask_roi_extractor(\n                    x[:len(mask_roi_extractor.featmap_strides)], mask_rois)\n                if self.with_semantic and 'mask' in self.semantic_fusion:\n                    mask_semantic_feat = self.semantic_roi_extractor(\n                        [semantic_feat], mask_rois)\n                    mask_feats = mask_feats + mask_semantic_feat\n                last_feat = None\n\n                num_bbox_per_img = tuple(len(_bbox) for _bbox in _bboxes)\n                for i in range(self.num_stages):\n                    mask_head = self.mask_head[i]\n                    if self.mask_info_flow:\n                        mask_pred, last_feat = mask_head(mask_feats, last_feat)\n                    else:\n                        mask_pred = mask_head(mask_feats)\n\n                    # split batch mask prediction back to each image\n                    mask_pred = mask_pred.split(num_bbox_per_img, 0)\n                    aug_masks.append(\n                        [mask.sigmoid().cpu().numpy() for mask in mask_pred])\n\n                # apply mask post-processing to each image individually\n                segm_results = []\n                for i in range(num_imgs):\n                    if det_bboxes[i].shape[0] == 0:\n                        segm_results.append(\n                            [[]\n                             for _ in range(self.mask_head[-1].num_classes)])\n                    else:\n                        aug_mask = [mask[i] for mask in aug_masks]\n                        merged_mask = merge_aug_masks(\n                            aug_mask, [[img_metas[i]]] * self.num_stages,\n                            rcnn_test_cfg)\n                        segm_result = self.mask_head[-1].get_seg_masks(\n                            merged_mask, _bboxes[i], det_labels[i],\n                            rcnn_test_cfg, ori_shapes[i], scale_factors[i],\n                            rescale)\n                        segm_results.append(segm_result)\n            ms_segm_result['ensemble'] = segm_results\n\n        if self.with_mask:\n            results = list(\n                zip(ms_bbox_result['ensemble'], ms_segm_result['ensemble']))\n        else:\n            results = ms_bbox_result['ensemble']\n\n        return results\n\n    def aug_test(self, img_feats, proposal_list, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        if self.with_semantic:\n            semantic_feats = [\n                self.semantic_head(feat)[1] for feat in img_feats\n            ]\n        else:\n            semantic_feats = [None] * len(img_metas)\n\n        rcnn_test_cfg = self.test_cfg\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta, semantic in zip(img_feats, img_metas, semantic_feats):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n            flip_direction = img_meta[0]['flip_direction']\n\n            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                     scale_factor, flip, flip_direction)\n            # \"ms\" in variable names means multi-stage\n            ms_scores = []\n\n            rois = bbox2roi([proposals])\n\n            if rois.shape[0] == 0:\n                # There is no proposal in the single image\n                aug_bboxes.append(rois.new_zeros(0, 4))\n                aug_scores.append(rois.new_zeros(0, 1))\n                continue\n\n            for i in range(self.num_stages):\n                bbox_head = self.bbox_head[i]\n                bbox_results = self._bbox_forward(\n                    i, x, rois, semantic_feat=semantic)\n                ms_scores.append(bbox_results['cls_score'])\n\n                if i < self.num_stages - 1:\n                    bbox_label = bbox_results['cls_score'].argmax(dim=1)\n                    rois = bbox_head.regress_by_class(\n                        rois, bbox_label, bbox_results['bbox_pred'],\n                        img_meta[0])\n\n            cls_score = sum(ms_scores) / float(len(ms_scores))\n            bboxes, scores = self.bbox_head[-1].get_bboxes(\n                rois,\n                cls_score,\n                bbox_results['bbox_pred'],\n                img_shape,\n                scale_factor,\n                rescale=False,\n                cfg=None)\n            aug_bboxes.append(bboxes)\n            aug_scores.append(scores)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,\n                                                rcnn_test_cfg.score_thr,\n                                                rcnn_test_cfg.nms,\n                                                rcnn_test_cfg.max_per_img)\n\n        bbox_result = bbox2result(det_bboxes, det_labels,\n                                  self.bbox_head[-1].num_classes)\n\n        if self.with_mask:\n            if det_bboxes.shape[0] == 0:\n                segm_result = [[]\n                               for _ in range(self.mask_head[-1].num_classes)]\n            else:\n                aug_masks = []\n                aug_img_metas = []\n                for x, img_meta, semantic in zip(img_feats, img_metas,\n                                                 semantic_feats):\n                    img_shape = img_meta[0]['img_shape']\n                    scale_factor = img_meta[0]['scale_factor']\n                    flip = img_meta[0]['flip']\n                    flip_direction = img_meta[0]['flip_direction']\n                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                           scale_factor, flip, flip_direction)\n                    mask_rois = bbox2roi([_bboxes])\n                    mask_feats = self.mask_roi_extractor[-1](\n                        x[:len(self.mask_roi_extractor[-1].featmap_strides)],\n                        mask_rois)\n                    if self.with_semantic:\n                        semantic_feat = semantic\n                        mask_semantic_feat = self.semantic_roi_extractor(\n                            [semantic_feat], mask_rois)\n                        if mask_semantic_feat.shape[-2:] != mask_feats.shape[\n                                -2:]:\n                            mask_semantic_feat = F.adaptive_avg_pool2d(\n                                mask_semantic_feat, mask_feats.shape[-2:])\n                        mask_feats = mask_feats + mask_semantic_feat\n                    last_feat = None\n                    for i in range(self.num_stages):\n                        mask_head = self.mask_head[i]\n                        if self.mask_info_flow:\n                            mask_pred, last_feat = mask_head(\n                                mask_feats, last_feat)\n                        else:\n                            mask_pred = mask_head(mask_feats)\n                        aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n                        aug_img_metas.append(img_meta)\n                merged_masks = merge_aug_masks(aug_masks, aug_img_metas,\n                                               self.test_cfg)\n\n                ori_shape = img_metas[0][0]['ori_shape']\n                segm_result = self.mask_head[-1].get_seg_masks(\n                    merged_masks,\n                    det_bboxes,\n                    det_labels,\n                    rcnn_test_cfg,\n                    ori_shape,\n                    scale_factor=1.0,\n                    rescale=False)\n            return [(bbox_result, segm_result)]\n        else:\n            return [bbox_result]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .coarse_mask_head import CoarseMaskHead\nfrom .dynamic_mask_head import DynamicMaskHead\nfrom .fcn_mask_head import FCNMaskHead\nfrom .feature_relay_head import FeatureRelayHead\nfrom .fused_semantic_head import FusedSemanticHead\nfrom .global_context_head import GlobalContextHead\nfrom .grid_head import GridHead\nfrom .htc_mask_head import HTCMaskHead\nfrom .mask_point_head import MaskPointHead\nfrom .maskiou_head import MaskIoUHead\nfrom .scnet_mask_head import SCNetMaskHead\nfrom .scnet_semantic_head import SCNetSemanticHead\n\n__all__ = [\n    'FCNMaskHead', 'HTCMaskHead', 'FusedSemanticHead', 'GridHead',\n    'MaskIoUHead', 'CoarseMaskHead', 'MaskPointHead', 'SCNetMaskHead',\n    'SCNetSemanticHead', 'GlobalContextHead', 'FeatureRelayHead',\n    'DynamicMaskHead'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/coarse_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule, Linear\nfrom mmcv.runner import ModuleList, auto_fp16\n\nfrom mmdet.models.builder import HEADS\nfrom .fcn_mask_head import FCNMaskHead\n\n\n@HEADS.register_module()\nclass CoarseMaskHead(FCNMaskHead):\n    \"\"\"Coarse mask head used in PointRend.\n\n    Compared with standard ``FCNMaskHead``, ``CoarseMaskHead`` will downsample\n    the input feature map instead of upsample it.\n\n    Args:\n        num_convs (int): Number of conv layers in the head. Default: 0.\n        num_fcs (int): Number of fc layers in the head. Default: 2.\n        fc_out_channels (int): Number of output channels of fc layer.\n            Default: 1024.\n        downsample_factor (int): The factor that feature map is downsampled by.\n            Default: 2.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_convs=0,\n                 num_fcs=2,\n                 fc_out_channels=1024,\n                 downsample_factor=2,\n                 init_cfg=dict(\n                     type='Xavier',\n                     override=[\n                         dict(name='fcs'),\n                         dict(type='Constant', val=0.001, name='fc_logits')\n                     ]),\n                 *arg,\n                 **kwarg):\n        super(CoarseMaskHead, self).__init__(\n            *arg,\n            num_convs=num_convs,\n            upsample_cfg=dict(type=None),\n            init_cfg=None,\n            **kwarg)\n        self.init_cfg = init_cfg\n        self.num_fcs = num_fcs\n        assert self.num_fcs > 0\n        self.fc_out_channels = fc_out_channels\n        self.downsample_factor = downsample_factor\n        assert self.downsample_factor >= 1\n        # remove conv_logit\n        delattr(self, 'conv_logits')\n\n        if downsample_factor > 1:\n            downsample_in_channels = (\n                self.conv_out_channels\n                if self.num_convs > 0 else self.in_channels)\n            self.downsample_conv = ConvModule(\n                downsample_in_channels,\n                self.conv_out_channels,\n                kernel_size=downsample_factor,\n                stride=downsample_factor,\n                padding=0,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n        else:\n            self.downsample_conv = None\n\n        self.output_size = (self.roi_feat_size[0] // downsample_factor,\n                            self.roi_feat_size[1] // downsample_factor)\n        self.output_area = self.output_size[0] * self.output_size[1]\n\n        last_layer_dim = self.conv_out_channels * self.output_area\n\n        self.fcs = ModuleList()\n        for i in range(num_fcs):\n            fc_in_channels = (\n                last_layer_dim if i == 0 else self.fc_out_channels)\n            self.fcs.append(Linear(fc_in_channels, self.fc_out_channels))\n        last_layer_dim = self.fc_out_channels\n        output_channels = self.num_classes * self.output_area\n        self.fc_logits = Linear(last_layer_dim, output_channels)\n\n    def init_weights(self):\n        super(FCNMaskHead, self).init_weights()\n\n    @auto_fp16()\n    def forward(self, x):\n        for conv in self.convs:\n            x = conv(x)\n\n        if self.downsample_conv is not None:\n            x = self.downsample_conv(x)\n\n        x = x.flatten(1)\n        for fc in self.fcs:\n            x = self.relu(fc(x))\n        mask_pred = self.fc_logits(x).view(\n            x.size(0), self.num_classes, *self.output_size)\n        return mask_pred\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/dynamic_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import auto_fp16, force_fp32\n\nfrom mmdet.core import mask_target\nfrom mmdet.models.builder import HEADS\nfrom mmdet.models.dense_heads.atss_head import reduce_mean\nfrom mmdet.models.utils import build_transformer\nfrom .fcn_mask_head import FCNMaskHead\n\n\n@HEADS.register_module()\nclass DynamicMaskHead(FCNMaskHead):\n    r\"\"\"Dynamic Mask Head for\n    `Instances as Queries <http://arxiv.org/abs/2105.01928>`_\n\n    Args:\n        num_convs (int): Number of convolution layer.\n            Defaults to 4.\n        roi_feat_size (int): The output size of RoI extractor,\n            Defaults to 14.\n        in_channels (int): Input feature channels.\n            Defaults to 256.\n        conv_kernel_size (int): Kernel size of convolution layers.\n            Defaults to 3.\n        conv_out_channels (int): Output channels of convolution layers.\n            Defaults to 256.\n        num_classes (int): Number of classes.\n            Defaults to 80\n        class_agnostic (int): Whether generate class agnostic prediction.\n            Defaults to False.\n        dropout (float): Probability of drop the channel.\n            Defaults to 0.0\n        upsample_cfg (dict): The config for upsample layer.\n        conv_cfg (dict): The convolution layer config.\n        norm_cfg (dict): The norm layer config.\n        dynamic_conv_cfg (dict): The dynamic convolution layer config.\n        loss_mask (dict): The config for mask loss.\n    \"\"\"\n\n    def __init__(self,\n                 num_convs=4,\n                 roi_feat_size=14,\n                 in_channels=256,\n                 conv_kernel_size=3,\n                 conv_out_channels=256,\n                 num_classes=80,\n                 class_agnostic=False,\n                 upsample_cfg=dict(type='deconv', scale_factor=2),\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 dynamic_conv_cfg=dict(\n                     type='DynamicConv',\n                     in_channels=256,\n                     feat_channels=64,\n                     out_channels=256,\n                     input_feat_shape=14,\n                     with_proj=False,\n                     act_cfg=dict(type='ReLU', inplace=True),\n                     norm_cfg=dict(type='LN')),\n                 loss_mask=dict(type='DiceLoss', loss_weight=8.0),\n                 **kwargs):\n        super(DynamicMaskHead, self).__init__(\n            num_convs=num_convs,\n            roi_feat_size=roi_feat_size,\n            in_channels=in_channels,\n            conv_kernel_size=conv_kernel_size,\n            conv_out_channels=conv_out_channels,\n            num_classes=num_classes,\n            class_agnostic=class_agnostic,\n            upsample_cfg=upsample_cfg,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            loss_mask=loss_mask,\n            **kwargs)\n        assert class_agnostic is False, \\\n            'DynamicMaskHead only support class_agnostic=False'\n        self.fp16_enabled = False\n\n        self.instance_interactive_conv = build_transformer(dynamic_conv_cfg)\n\n    def init_weights(self):\n        \"\"\"Use xavier initialization for all weight parameter and set\n        classification head bias as a specific value when use focal loss.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n            nn.init.constant_(self.conv_logits.bias, 0.)\n\n    @auto_fp16()\n    def forward(self, roi_feat, proposal_feat):\n        \"\"\"Forward function of DynamicMaskHead.\n\n        Args:\n            roi_feat (Tensor): Roi-pooling features with shape\n                (batch_size*num_proposals, feature_dimensions,\n                pooling_h , pooling_w).\n            proposal_feat (Tensor): Intermediate feature get from\n                diihead in last stage, has shape\n                (batch_size*num_proposals, feature_dimensions)\n\n          Returns:\n            mask_pred (Tensor): Predicted foreground masks with shape\n                (batch_size*num_proposals, num_classes,\n                                        pooling_h*2, pooling_w*2).\n        \"\"\"\n\n        proposal_feat = proposal_feat.reshape(-1, self.in_channels)\n        proposal_feat_iic = self.instance_interactive_conv(\n            proposal_feat, roi_feat)\n\n        x = proposal_feat_iic.permute(0, 2, 1).reshape(roi_feat.size())\n\n        for conv in self.convs:\n            x = conv(x)\n        if self.upsample is not None:\n            x = self.upsample(x)\n            if self.upsample_method == 'deconv':\n                x = self.relu(x)\n        mask_pred = self.conv_logits(x)\n        return mask_pred\n\n    @force_fp32(apply_to=('mask_pred', ))\n    def loss(self, mask_pred, mask_targets, labels):\n        num_pos = labels.new_ones(labels.size()).float().sum()\n        avg_factor = torch.clamp(reduce_mean(num_pos), min=1.).item()\n        loss = dict()\n        if mask_pred.size(0) == 0:\n            loss_mask = mask_pred.sum()\n        else:\n            loss_mask = self.loss_mask(\n                mask_pred[torch.arange(num_pos).long(), labels, ...].sigmoid(),\n                mask_targets,\n                avg_factor=avg_factor)\n        loss['loss_mask'] = loss_mask\n        return loss\n\n    def get_targets(self, sampling_results, gt_masks, rcnn_train_cfg):\n\n        pos_proposals = [res.pos_bboxes for res in sampling_results]\n        pos_assigned_gt_inds = [\n            res.pos_assigned_gt_inds for res in sampling_results\n        ]\n        mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,\n                                   gt_masks, rcnn_train_cfg)\n        return mask_targets\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/fcn_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom warnings import warn\n\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule, build_conv_layer, build_upsample_layer\nfrom mmcv.ops.carafe import CARAFEPack\nfrom mmcv.runner import BaseModule, ModuleList, auto_fp16, force_fp32\nfrom torch.nn.modules.utils import _pair\n\nfrom mmdet.core import mask_target\nfrom mmdet.models.builder import HEADS, build_loss\n\nBYTES_PER_FLOAT = 4\n# TODO: This memory limit may be too much or too little. It would be better to\n# determine it based on available resources.\nGPU_MEM_LIMIT = 1024**3  # 1 GB memory limit\n\n\n@HEADS.register_module()\nclass FCNMaskHead(BaseModule):\n\n    def __init__(self,\n                 num_convs=4,\n                 roi_feat_size=14,\n                 in_channels=256,\n                 conv_kernel_size=3,\n                 conv_out_channels=256,\n                 num_classes=80,\n                 class_agnostic=False,\n                 upsample_cfg=dict(type='deconv', scale_factor=2),\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 predictor_cfg=dict(type='Conv'),\n                 loss_mask=dict(\n                     type='CrossEntropyLoss', use_mask=True, loss_weight=1.0),\n                 init_cfg=None):\n        assert init_cfg is None, 'To prevent abnormal initialization ' \\\n                                 'behavior, init_cfg is not allowed to be set'\n        super(FCNMaskHead, self).__init__(init_cfg)\n        self.upsample_cfg = upsample_cfg.copy()\n        if self.upsample_cfg['type'] not in [\n                None, 'deconv', 'nearest', 'bilinear', 'carafe'\n        ]:\n            raise ValueError(\n                f'Invalid upsample method {self.upsample_cfg[\"type\"]}, '\n                'accepted methods are \"deconv\", \"nearest\", \"bilinear\", '\n                '\"carafe\"')\n        self.num_convs = num_convs\n        # WARN: roi_feat_size is reserved and not used\n        self.roi_feat_size = _pair(roi_feat_size)\n        self.in_channels = in_channels\n        self.conv_kernel_size = conv_kernel_size\n        self.conv_out_channels = conv_out_channels\n        self.upsample_method = self.upsample_cfg.get('type')\n        self.scale_factor = self.upsample_cfg.pop('scale_factor', None)\n        self.num_classes = num_classes\n        self.class_agnostic = class_agnostic\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.predictor_cfg = predictor_cfg\n        self.fp16_enabled = False\n        self.loss_mask = build_loss(loss_mask)\n\n        self.convs = ModuleList()\n        for i in range(self.num_convs):\n            in_channels = (\n                self.in_channels if i == 0 else self.conv_out_channels)\n            padding = (self.conv_kernel_size - 1) // 2\n            self.convs.append(\n                ConvModule(\n                    in_channels,\n                    self.conv_out_channels,\n                    self.conv_kernel_size,\n                    padding=padding,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg))\n        upsample_in_channels = (\n            self.conv_out_channels if self.num_convs > 0 else in_channels)\n        upsample_cfg_ = self.upsample_cfg.copy()\n        if self.upsample_method is None:\n            self.upsample = None\n        elif self.upsample_method == 'deconv':\n            upsample_cfg_.update(\n                in_channels=upsample_in_channels,\n                out_channels=self.conv_out_channels,\n                kernel_size=self.scale_factor,\n                stride=self.scale_factor)\n            self.upsample = build_upsample_layer(upsample_cfg_)\n        elif self.upsample_method == 'carafe':\n            upsample_cfg_.update(\n                channels=upsample_in_channels, scale_factor=self.scale_factor)\n            self.upsample = build_upsample_layer(upsample_cfg_)\n        else:\n            # suppress warnings\n            align_corners = (None\n                             if self.upsample_method == 'nearest' else False)\n            upsample_cfg_.update(\n                scale_factor=self.scale_factor,\n                mode=self.upsample_method,\n                align_corners=align_corners)\n            self.upsample = build_upsample_layer(upsample_cfg_)\n\n        out_channels = 1 if self.class_agnostic else self.num_classes\n        logits_in_channel = (\n            self.conv_out_channels\n            if self.upsample_method == 'deconv' else upsample_in_channels)\n        self.conv_logits = build_conv_layer(self.predictor_cfg,\n                                            logits_in_channel, out_channels, 1)\n        self.relu = nn.ReLU(inplace=True)\n        self.debug_imgs = None\n\n    def init_weights(self):\n        super(FCNMaskHead, self).init_weights()\n        for m in [self.upsample, self.conv_logits]:\n            if m is None:\n                continue\n            elif isinstance(m, CARAFEPack):\n                m.init_weights()\n            elif hasattr(m, 'weight') and hasattr(m, 'bias'):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu')\n                nn.init.constant_(m.bias, 0)\n\n    @auto_fp16()\n    def forward(self, x):\n        for conv in self.convs:\n            x = conv(x)\n        if self.upsample is not None:\n            x = self.upsample(x)\n            if self.upsample_method == 'deconv':\n                x = self.relu(x)\n        mask_pred = self.conv_logits(x)\n        return mask_pred\n\n    def get_targets(self, sampling_results, gt_masks, rcnn_train_cfg):\n        pos_proposals = [res.pos_bboxes for res in sampling_results]\n        pos_assigned_gt_inds = [\n            res.pos_assigned_gt_inds for res in sampling_results\n        ]\n        mask_targets = mask_target(pos_proposals, pos_assigned_gt_inds,\n                                   gt_masks, rcnn_train_cfg)\n        return mask_targets\n\n    @force_fp32(apply_to=('mask_pred', ))\n    def loss(self, mask_pred, mask_targets, labels):\n        \"\"\"\n        Example:\n            >>> from mmdet.models.roi_heads.mask_heads.fcn_mask_head import *  # NOQA\n            >>> N = 7  # N = number of extracted ROIs\n            >>> C, H, W = 11, 32, 32\n            >>> # Create example instance of FCN Mask Head.\n            >>> # There are lots of variations depending on the configuration\n            >>> self = FCNMaskHead(num_classes=C, num_convs=1)\n            >>> inputs = torch.rand(N, self.in_channels, H, W)\n            >>> mask_pred = self.forward(inputs)\n            >>> sf = self.scale_factor\n            >>> labels = torch.randint(0, C, size=(N,))\n            >>> # With the default properties the mask targets should indicate\n            >>> # a (potentially soft) single-class label\n            >>> mask_targets = torch.rand(N, H * sf, W * sf)\n            >>> loss = self.loss(mask_pred, mask_targets, labels)\n            >>> print('loss = {!r}'.format(loss))\n        \"\"\"\n        loss = dict()\n        if mask_pred.size(0) == 0:\n            loss_mask = mask_pred.sum()\n        else:\n            if self.class_agnostic:\n                loss_mask = self.loss_mask(mask_pred, mask_targets,\n                                           torch.zeros_like(labels))\n            else:\n                loss_mask = self.loss_mask(mask_pred, mask_targets, labels)\n        loss['loss_mask'] = loss_mask\n        return loss\n\n    def get_seg_masks(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,\n                      ori_shape, scale_factor, rescale):\n        \"\"\"Get segmentation masks from mask_pred and bboxes.\n\n        Args:\n            mask_pred (Tensor or ndarray): shape (n, #class, h, w).\n                For single-scale testing, mask_pred is the direct output of\n                model, whose type is Tensor, while for multi-scale testing,\n                it will be converted to numpy array outside of this method.\n            det_bboxes (Tensor): shape (n, 4/5)\n            det_labels (Tensor): shape (n, )\n            rcnn_test_cfg (dict): rcnn testing config\n            ori_shape (Tuple): original image height and width, shape (2,)\n            scale_factor(ndarray | Tensor): If ``rescale is True``, box\n                coordinates are divided by this scale factor to fit\n                ``ori_shape``.\n            rescale (bool): If True, the resulting masks will be rescaled to\n                ``ori_shape``.\n\n        Returns:\n            list[list]: encoded masks. The c-th item in the outer list\n                corresponds to the c-th class. Given the c-th outer list, the\n                i-th item in that inner list is the mask for the i-th box with\n                class label c.\n\n        Example:\n            >>> import mmcv\n            >>> from mmdet.models.roi_heads.mask_heads.fcn_mask_head import *  # NOQA\n            >>> N = 7  # N = number of extracted ROIs\n            >>> C, H, W = 11, 32, 32\n            >>> # Create example instance of FCN Mask Head.\n            >>> self = FCNMaskHead(num_classes=C, num_convs=0)\n            >>> inputs = torch.rand(N, self.in_channels, H, W)\n            >>> mask_pred = self.forward(inputs)\n            >>> # Each input is associated with some bounding box\n            >>> det_bboxes = torch.Tensor([[1, 1, 42, 42 ]] * N)\n            >>> det_labels = torch.randint(0, C, size=(N,))\n            >>> rcnn_test_cfg = mmcv.Config({'mask_thr_binary': 0, })\n            >>> ori_shape = (H * 4, W * 4)\n            >>> scale_factor = torch.FloatTensor((1, 1))\n            >>> rescale = False\n            >>> # Encoded masks are a list for each category.\n            >>> encoded_masks = self.get_seg_masks(\n            >>>     mask_pred, det_bboxes, det_labels, rcnn_test_cfg, ori_shape,\n            >>>     scale_factor, rescale\n            >>> )\n            >>> assert len(encoded_masks) == C\n            >>> assert sum(list(map(len, encoded_masks))) == N\n        \"\"\"\n        if isinstance(mask_pred, torch.Tensor):\n            mask_pred = mask_pred.sigmoid()\n        else:\n            # In AugTest, has been activated before\n            mask_pred = det_bboxes.new_tensor(mask_pred)\n\n        device = mask_pred.device\n        cls_segms = [[] for _ in range(self.num_classes)\n                     ]  # BG is not included in num_classes\n        bboxes = det_bboxes[:, :4]\n        labels = det_labels\n\n        # In most cases, scale_factor should have been\n        # converted to Tensor when rescale the bbox\n        if not isinstance(scale_factor, torch.Tensor):\n            if isinstance(scale_factor, float):\n                scale_factor = np.array([scale_factor] * 4)\n                warn('Scale_factor should be a Tensor or ndarray '\n                     'with shape (4,), float would be deprecated. ')\n            assert isinstance(scale_factor, np.ndarray)\n            scale_factor = torch.Tensor(scale_factor)\n\n        if rescale:\n            img_h, img_w = ori_shape[:2]\n            bboxes = bboxes / scale_factor.to(bboxes)\n        else:\n            w_scale, h_scale = scale_factor[0], scale_factor[1]\n            img_h = np.round(ori_shape[0] * h_scale.item()).astype(np.int32)\n            img_w = np.round(ori_shape[1] * w_scale.item()).astype(np.int32)\n\n        N = len(mask_pred)\n        # The actual implementation split the input into chunks,\n        # and paste them chunk by chunk.\n        if device.type == 'cpu':\n            # CPU is most efficient when they are pasted one by one with\n            # skip_empty=True, so that it performs minimal number of\n            # operations.\n            num_chunks = N\n        else:\n            # GPU benefits from parallelism for larger chunks,\n            # but may have memory issue\n            # the types of img_w and img_h are np.int32,\n            # when the image resolution is large,\n            # the calculation of num_chunks will overflow.\n            # so we need to change the types of img_w and img_h to int.\n            # See https://github.com/open-mmlab/mmdetection/pull/5191\n            num_chunks = int(\n                np.ceil(N * int(img_h) * int(img_w) * BYTES_PER_FLOAT /\n                        GPU_MEM_LIMIT))\n            assert (num_chunks <=\n                    N), 'Default GPU_MEM_LIMIT is too small; try increasing it'\n        chunks = torch.chunk(torch.arange(N, device=device), num_chunks)\n\n        threshold = rcnn_test_cfg.mask_thr_binary\n        im_mask = torch.zeros(\n            N,\n            img_h,\n            img_w,\n            device=device,\n            dtype=torch.bool if threshold >= 0 else torch.uint8)\n\n        if not self.class_agnostic:\n            mask_pred = mask_pred[range(N), labels][:, None]\n\n        for inds in chunks:\n            masks_chunk, spatial_inds = _do_paste_mask(\n                mask_pred[inds],\n                bboxes[inds],\n                img_h,\n                img_w,\n                skip_empty=device.type == 'cpu')\n\n            if threshold >= 0:\n                masks_chunk = (masks_chunk >= threshold).to(dtype=torch.bool)\n            else:\n                # for visualization and debugging\n                masks_chunk = (masks_chunk * 255).to(dtype=torch.uint8)\n\n            im_mask[(inds, ) + spatial_inds] = masks_chunk\n\n        for i in range(N):\n            cls_segms[labels[i]].append(im_mask[i].detach().cpu().numpy())\n        return cls_segms\n\n    def onnx_export(self, mask_pred, det_bboxes, det_labels, rcnn_test_cfg,\n                    ori_shape, **kwargs):\n        \"\"\"Get segmentation masks from mask_pred and bboxes.\n\n        Args:\n            mask_pred (Tensor): shape (n, #class, h, w).\n            det_bboxes (Tensor): shape (n, 4/5)\n            det_labels (Tensor): shape (n, )\n            rcnn_test_cfg (dict): rcnn testing config\n            ori_shape (Tuple): original image height and width, shape (2,)\n\n        Returns:\n            Tensor: a mask of shape (N, img_h, img_w).\n        \"\"\"\n\n        mask_pred = mask_pred.sigmoid()\n        bboxes = det_bboxes[:, :4]\n        labels = det_labels\n        # No need to consider rescale and scale_factor while exporting to ONNX\n        img_h, img_w = ori_shape[:2]\n        threshold = rcnn_test_cfg.mask_thr_binary\n        if not self.class_agnostic:\n            box_inds = torch.arange(mask_pred.shape[0])\n            mask_pred = mask_pred[box_inds, labels][:, None]\n        masks, _ = _do_paste_mask(\n            mask_pred, bboxes, img_h, img_w, skip_empty=False)\n        if threshold >= 0:\n            # should convert to float to avoid problems in TRT\n            masks = (masks >= threshold).to(dtype=torch.float)\n        return masks\n\n\ndef _do_paste_mask(masks, boxes, img_h, img_w, skip_empty=True):\n    \"\"\"Paste instance masks according to boxes.\n\n    This implementation is modified from\n    https://github.com/facebookresearch/detectron2/\n\n    Args:\n        masks (Tensor): N, 1, H, W\n        boxes (Tensor): N, 4\n        img_h (int): Height of the image to be pasted.\n        img_w (int): Width of the image to be pasted.\n        skip_empty (bool): Only paste masks within the region that\n            tightly bound all boxes, and returns the results this region only.\n            An important optimization for CPU.\n\n    Returns:\n        tuple: (Tensor, tuple). The first item is mask tensor, the second one\n            is the slice object.\n        If skip_empty == False, the whole image will be pasted. It will\n            return a mask of shape (N, img_h, img_w) and an empty tuple.\n        If skip_empty == True, only area around the mask will be pasted.\n            A mask of shape (N, h', w') and its start and end coordinates\n            in the original image will be returned.\n    \"\"\"\n    # On GPU, paste all masks together (up to chunk size)\n    # by using the entire image to sample the masks\n    # Compared to pasting them one by one,\n    # this has more operations but is faster on COCO-scale dataset.\n    device = masks.device\n    if skip_empty:\n        x0_int, y0_int = torch.clamp(\n            boxes.min(dim=0).values.floor()[:2] - 1,\n            min=0).to(dtype=torch.int32)\n        x1_int = torch.clamp(\n            boxes[:, 2].max().ceil() + 1, max=img_w).to(dtype=torch.int32)\n        y1_int = torch.clamp(\n            boxes[:, 3].max().ceil() + 1, max=img_h).to(dtype=torch.int32)\n    else:\n        x0_int, y0_int = 0, 0\n        x1_int, y1_int = img_w, img_h\n    x0, y0, x1, y1 = torch.split(boxes, 1, dim=1)  # each is Nx1\n\n    N = masks.shape[0]\n\n    img_y = torch.arange(y0_int, y1_int, device=device).to(torch.float32) + 0.5\n    img_x = torch.arange(x0_int, x1_int, device=device).to(torch.float32) + 0.5\n    img_y = (img_y - y0) / (y1 - y0) * 2 - 1\n    img_x = (img_x - x0) / (x1 - x0) * 2 - 1\n    # img_x, img_y have shapes (N, w), (N, h)\n    # IsInf op is not supported with ONNX<=1.7.0\n    if not torch.onnx.is_in_onnx_export():\n        if torch.isinf(img_x).any():\n            inds = torch.where(torch.isinf(img_x))\n            img_x[inds] = 0\n        if torch.isinf(img_y).any():\n            inds = torch.where(torch.isinf(img_y))\n            img_y[inds] = 0\n\n    gx = img_x[:, None, :].expand(N, img_y.size(1), img_x.size(1))\n    gy = img_y[:, :, None].expand(N, img_y.size(1), img_x.size(1))\n    grid = torch.stack([gx, gy], dim=3)\n\n    img_masks = F.grid_sample(\n        masks.to(dtype=torch.float32), grid, align_corners=False)\n\n    if skip_empty:\n        return img_masks[:, 0], (slice(y0_int, y1_int), slice(x0_int, x1_int))\n    else:\n        return img_masks[:, 0], ()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/feature_relay_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.runner import BaseModule, auto_fp16\n\nfrom mmdet.models.builder import HEADS\n\n\n@HEADS.register_module()\nclass FeatureRelayHead(BaseModule):\n    \"\"\"Feature Relay Head used in `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    Args:\n        in_channels (int, optional): number of input channels. Default: 256.\n        conv_out_channels (int, optional): number of output channels before\n            classification layer. Default: 256.\n        roi_feat_size (int, optional): roi feat size at box head. Default: 7.\n        scale_factor (int, optional): scale factor to match roi feat size\n            at mask head. Default: 2.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=1024,\n                 out_conv_channels=256,\n                 roi_feat_size=7,\n                 scale_factor=2,\n                 init_cfg=dict(type='Kaiming', layer='Linear')):\n        super(FeatureRelayHead, self).__init__(init_cfg)\n        assert isinstance(roi_feat_size, int)\n\n        self.in_channels = in_channels\n        self.out_conv_channels = out_conv_channels\n        self.roi_feat_size = roi_feat_size\n        self.out_channels = (roi_feat_size**2) * out_conv_channels\n        self.scale_factor = scale_factor\n        self.fp16_enabled = False\n\n        self.fc = nn.Linear(self.in_channels, self.out_channels)\n        self.upsample = nn.Upsample(\n            scale_factor=scale_factor, mode='bilinear', align_corners=True)\n\n    @auto_fp16()\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        N, in_C = x.shape\n        if N > 0:\n            out_C = self.out_conv_channels\n            out_HW = self.roi_feat_size\n            x = self.fc(x)\n            x = x.reshape(N, out_C, out_HW, out_HW)\n            x = self.upsample(x)\n            return x\n        return None\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/fused_semantic_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16, force_fp32\n\nfrom mmdet.models.builder import HEADS, build_loss\n\n\n@HEADS.register_module()\nclass FusedSemanticHead(BaseModule):\n    r\"\"\"Multi-level fused semantic segmentation head.\n\n    .. code-block:: none\n\n        in_1 -> 1x1 conv ---\n                            |\n        in_2 -> 1x1 conv -- |\n                           ||\n        in_3 -> 1x1 conv - ||\n                          |||                  /-> 1x1 conv (mask prediction)\n        in_4 -> 1x1 conv -----> 3x3 convs (*4)\n                            |                  \\-> 1x1 conv (feature)\n        in_5 -> 1x1 conv ---\n    \"\"\"  # noqa: W605\n\n    def __init__(self,\n                 num_ins,\n                 fusion_level,\n                 num_convs=4,\n                 in_channels=256,\n                 conv_out_channels=256,\n                 num_classes=183,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 ignore_label=None,\n                 loss_weight=None,\n                 loss_seg=dict(\n                     type='CrossEntropyLoss',\n                     ignore_index=255,\n                     loss_weight=0.2),\n                 init_cfg=dict(\n                     type='Kaiming', override=dict(name='conv_logits'))):\n        super(FusedSemanticHead, self).__init__(init_cfg)\n        self.num_ins = num_ins\n        self.fusion_level = fusion_level\n        self.num_convs = num_convs\n        self.in_channels = in_channels\n        self.conv_out_channels = conv_out_channels\n        self.num_classes = num_classes\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.fp16_enabled = False\n\n        self.lateral_convs = nn.ModuleList()\n        for i in range(self.num_ins):\n            self.lateral_convs.append(\n                ConvModule(\n                    self.in_channels,\n                    self.in_channels,\n                    1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    inplace=False))\n\n        self.convs = nn.ModuleList()\n        for i in range(self.num_convs):\n            in_channels = self.in_channels if i == 0 else conv_out_channels\n            self.convs.append(\n                ConvModule(\n                    in_channels,\n                    conv_out_channels,\n                    3,\n                    padding=1,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg))\n        self.conv_embedding = ConvModule(\n            conv_out_channels,\n            conv_out_channels,\n            1,\n            conv_cfg=self.conv_cfg,\n            norm_cfg=self.norm_cfg)\n        self.conv_logits = nn.Conv2d(conv_out_channels, self.num_classes, 1)\n        if ignore_label:\n            loss_seg['ignore_index'] = ignore_label\n        if loss_weight:\n            loss_seg['loss_weight'] = loss_weight\n        if ignore_label or loss_weight:\n            warnings.warn('``ignore_label`` and ``loss_weight`` would be '\n                          'deprecated soon. Please set ``ingore_index`` and '\n                          '``loss_weight`` in ``loss_seg`` instead.')\n        self.criterion = build_loss(loss_seg)\n\n    @auto_fp16()\n    def forward(self, feats):\n        x = self.lateral_convs[self.fusion_level](feats[self.fusion_level])\n        fused_size = tuple(x.shape[-2:])\n        for i, feat in enumerate(feats):\n            if i != self.fusion_level:\n                feat = F.interpolate(\n                    feat, size=fused_size, mode='bilinear', align_corners=True)\n                # fix runtime error of \"+=\" inplace operation in PyTorch 1.10\n                x = x + self.lateral_convs[i](feat)\n\n        for i in range(self.num_convs):\n            x = self.convs[i](x)\n\n        mask_pred = self.conv_logits(x)\n        x = self.conv_embedding(x)\n        return mask_pred, x\n\n    @force_fp32(apply_to=('mask_pred', ))\n    def loss(self, mask_pred, labels):\n        labels = labels.squeeze(1).long()\n        loss_semantic_seg = self.criterion(mask_pred, labels)\n        return loss_semantic_seg\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/global_context_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, auto_fp16, force_fp32\n\nfrom mmdet.models.builder import HEADS\nfrom mmdet.models.utils import ResLayer, SimplifiedBasicBlock\n\n\n@HEADS.register_module()\nclass GlobalContextHead(BaseModule):\n    \"\"\"Global context head used in `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    Args:\n        num_convs (int, optional): number of convolutional layer in GlbCtxHead.\n            Default: 4.\n        in_channels (int, optional): number of input channels. Default: 256.\n        conv_out_channels (int, optional): number of output channels before\n            classification layer. Default: 256.\n        num_classes (int, optional): number of classes. Default: 80.\n        loss_weight (float, optional): global context loss weight. Default: 1.\n        conv_cfg (dict, optional): config to init conv layer. Default: None.\n        norm_cfg (dict, optional): config to init norm layer. Default: None.\n        conv_to_res (bool, optional): if True, 2 convs will be grouped into\n            1 `SimplifiedBasicBlock` using a skip connection. Default: False.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_convs=4,\n                 in_channels=256,\n                 conv_out_channels=256,\n                 num_classes=80,\n                 loss_weight=1.0,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 conv_to_res=False,\n                 init_cfg=dict(\n                     type='Normal', std=0.01, override=dict(name='fc'))):\n        super(GlobalContextHead, self).__init__(init_cfg)\n        self.num_convs = num_convs\n        self.in_channels = in_channels\n        self.conv_out_channels = conv_out_channels\n        self.num_classes = num_classes\n        self.loss_weight = loss_weight\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.conv_to_res = conv_to_res\n        self.fp16_enabled = False\n\n        if self.conv_to_res:\n            num_res_blocks = num_convs // 2\n            self.convs = ResLayer(\n                SimplifiedBasicBlock,\n                in_channels,\n                self.conv_out_channels,\n                num_res_blocks,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n            self.num_convs = num_res_blocks\n        else:\n            self.convs = nn.ModuleList()\n            for i in range(self.num_convs):\n                in_channels = self.in_channels if i == 0 else conv_out_channels\n                self.convs.append(\n                    ConvModule(\n                        in_channels,\n                        conv_out_channels,\n                        3,\n                        padding=1,\n                        conv_cfg=self.conv_cfg,\n                        norm_cfg=self.norm_cfg))\n\n        self.pool = nn.AdaptiveAvgPool2d(1)\n        self.fc = nn.Linear(conv_out_channels, num_classes)\n\n        self.criterion = nn.BCEWithLogitsLoss()\n\n    @auto_fp16()\n    def forward(self, feats):\n        \"\"\"Forward function.\"\"\"\n        x = feats[-1]\n        for i in range(self.num_convs):\n            x = self.convs[i](x)\n        x = self.pool(x)\n\n        # multi-class prediction\n        mc_pred = x.reshape(x.size(0), -1)\n        mc_pred = self.fc(mc_pred)\n\n        return mc_pred, x\n\n    @force_fp32(apply_to=('pred', ))\n    def loss(self, pred, labels):\n        \"\"\"Loss function.\"\"\"\n        labels = [lbl.unique() for lbl in labels]\n        targets = pred.new_zeros(pred.size())\n        for i, label in enumerate(labels):\n            targets[i, label] = 1.0\n        loss = self.loss_weight * self.criterion(pred, targets)\n        return loss\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/grid_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\n\nfrom mmdet.models.builder import HEADS, build_loss\n\n\n@HEADS.register_module()\nclass GridHead(BaseModule):\n\n    def __init__(self,\n                 grid_points=9,\n                 num_convs=8,\n                 roi_feat_size=14,\n                 in_channels=256,\n                 conv_kernel_size=3,\n                 point_feat_channels=64,\n                 deconv_kernel_size=4,\n                 class_agnostic=False,\n                 loss_grid=dict(\n                     type='CrossEntropyLoss', use_sigmoid=True,\n                     loss_weight=15),\n                 conv_cfg=None,\n                 norm_cfg=dict(type='GN', num_groups=36),\n                 init_cfg=[\n                     dict(type='Kaiming', layer=['Conv2d', 'Linear']),\n                     dict(\n                         type='Normal',\n                         layer='ConvTranspose2d',\n                         std=0.001,\n                         override=dict(\n                             type='Normal',\n                             name='deconv2',\n                             std=0.001,\n                             bias=-np.log(0.99 / 0.01)))\n                 ]):\n        super(GridHead, self).__init__(init_cfg)\n        self.grid_points = grid_points\n        self.num_convs = num_convs\n        self.roi_feat_size = roi_feat_size\n        self.in_channels = in_channels\n        self.conv_kernel_size = conv_kernel_size\n        self.point_feat_channels = point_feat_channels\n        self.conv_out_channels = self.point_feat_channels * self.grid_points\n        self.class_agnostic = class_agnostic\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        if isinstance(norm_cfg, dict) and norm_cfg['type'] == 'GN':\n            assert self.conv_out_channels % norm_cfg['num_groups'] == 0\n\n        assert self.grid_points >= 4\n        self.grid_size = int(np.sqrt(self.grid_points))\n        if self.grid_size * self.grid_size != self.grid_points:\n            raise ValueError('grid_points must be a square number')\n\n        # the predicted heatmap is half of whole_map_size\n        if not isinstance(self.roi_feat_size, int):\n            raise ValueError('Only square RoIs are supporeted in Grid R-CNN')\n        self.whole_map_size = self.roi_feat_size * 4\n\n        # compute point-wise sub-regions\n        self.sub_regions = self.calc_sub_regions()\n\n        self.convs = []\n        for i in range(self.num_convs):\n            in_channels = (\n                self.in_channels if i == 0 else self.conv_out_channels)\n            stride = 2 if i == 0 else 1\n            padding = (self.conv_kernel_size - 1) // 2\n            self.convs.append(\n                ConvModule(\n                    in_channels,\n                    self.conv_out_channels,\n                    self.conv_kernel_size,\n                    stride=stride,\n                    padding=padding,\n                    conv_cfg=self.conv_cfg,\n                    norm_cfg=self.norm_cfg,\n                    bias=True))\n        self.convs = nn.Sequential(*self.convs)\n\n        self.deconv1 = nn.ConvTranspose2d(\n            self.conv_out_channels,\n            self.conv_out_channels,\n            kernel_size=deconv_kernel_size,\n            stride=2,\n            padding=(deconv_kernel_size - 2) // 2,\n            groups=grid_points)\n        self.norm1 = nn.GroupNorm(grid_points, self.conv_out_channels)\n        self.deconv2 = nn.ConvTranspose2d(\n            self.conv_out_channels,\n            grid_points,\n            kernel_size=deconv_kernel_size,\n            stride=2,\n            padding=(deconv_kernel_size - 2) // 2,\n            groups=grid_points)\n\n        # find the 4-neighbor of each grid point\n        self.neighbor_points = []\n        grid_size = self.grid_size\n        for i in range(grid_size):  # i-th column\n            for j in range(grid_size):  # j-th row\n                neighbors = []\n                if i > 0:  # left: (i - 1, j)\n                    neighbors.append((i - 1) * grid_size + j)\n                if j > 0:  # up: (i, j - 1)\n                    neighbors.append(i * grid_size + j - 1)\n                if j < grid_size - 1:  # down: (i, j + 1)\n                    neighbors.append(i * grid_size + j + 1)\n                if i < grid_size - 1:  # right: (i + 1, j)\n                    neighbors.append((i + 1) * grid_size + j)\n                self.neighbor_points.append(tuple(neighbors))\n        # total edges in the grid\n        self.num_edges = sum([len(p) for p in self.neighbor_points])\n\n        self.forder_trans = nn.ModuleList()  # first-order feature transition\n        self.sorder_trans = nn.ModuleList()  # second-order feature transition\n        for neighbors in self.neighbor_points:\n            fo_trans = nn.ModuleList()\n            so_trans = nn.ModuleList()\n            for _ in range(len(neighbors)):\n                # each transition module consists of a 5x5 depth-wise conv and\n                # 1x1 conv.\n                fo_trans.append(\n                    nn.Sequential(\n                        nn.Conv2d(\n                            self.point_feat_channels,\n                            self.point_feat_channels,\n                            5,\n                            stride=1,\n                            padding=2,\n                            groups=self.point_feat_channels),\n                        nn.Conv2d(self.point_feat_channels,\n                                  self.point_feat_channels, 1)))\n                so_trans.append(\n                    nn.Sequential(\n                        nn.Conv2d(\n                            self.point_feat_channels,\n                            self.point_feat_channels,\n                            5,\n                            1,\n                            2,\n                            groups=self.point_feat_channels),\n                        nn.Conv2d(self.point_feat_channels,\n                                  self.point_feat_channels, 1)))\n            self.forder_trans.append(fo_trans)\n            self.sorder_trans.append(so_trans)\n\n        self.loss_grid = build_loss(loss_grid)\n\n    def forward(self, x):\n        assert x.shape[-1] == x.shape[-2] == self.roi_feat_size\n        # RoI feature transformation, downsample 2x\n        x = self.convs(x)\n\n        c = self.point_feat_channels\n        # first-order fusion\n        x_fo = [None for _ in range(self.grid_points)]\n        for i, points in enumerate(self.neighbor_points):\n            x_fo[i] = x[:, i * c:(i + 1) * c]\n            for j, point_idx in enumerate(points):\n                x_fo[i] = x_fo[i] + self.forder_trans[i][j](\n                    x[:, point_idx * c:(point_idx + 1) * c])\n\n        # second-order fusion\n        x_so = [None for _ in range(self.grid_points)]\n        for i, points in enumerate(self.neighbor_points):\n            x_so[i] = x[:, i * c:(i + 1) * c]\n            for j, point_idx in enumerate(points):\n                x_so[i] = x_so[i] + self.sorder_trans[i][j](x_fo[point_idx])\n\n        # predicted heatmap with fused features\n        x2 = torch.cat(x_so, dim=1)\n        x2 = self.deconv1(x2)\n        x2 = F.relu(self.norm1(x2), inplace=True)\n        heatmap = self.deconv2(x2)\n\n        # predicted heatmap with original features (applicable during training)\n        if self.training:\n            x1 = x\n            x1 = self.deconv1(x1)\n            x1 = F.relu(self.norm1(x1), inplace=True)\n            heatmap_unfused = self.deconv2(x1)\n        else:\n            heatmap_unfused = heatmap\n\n        return dict(fused=heatmap, unfused=heatmap_unfused)\n\n    def calc_sub_regions(self):\n        \"\"\"Compute point specific representation regions.\n\n        See Grid R-CNN Plus (https://arxiv.org/abs/1906.05688) for details.\n        \"\"\"\n        # to make it consistent with the original implementation, half_size\n        # is computed as 2 * quarter_size, which is smaller\n        half_size = self.whole_map_size // 4 * 2\n        sub_regions = []\n        for i in range(self.grid_points):\n            x_idx = i // self.grid_size\n            y_idx = i % self.grid_size\n            if x_idx == 0:\n                sub_x1 = 0\n            elif x_idx == self.grid_size - 1:\n                sub_x1 = half_size\n            else:\n                ratio = x_idx / (self.grid_size - 1) - 0.25\n                sub_x1 = max(int(ratio * self.whole_map_size), 0)\n\n            if y_idx == 0:\n                sub_y1 = 0\n            elif y_idx == self.grid_size - 1:\n                sub_y1 = half_size\n            else:\n                ratio = y_idx / (self.grid_size - 1) - 0.25\n                sub_y1 = max(int(ratio * self.whole_map_size), 0)\n            sub_regions.append(\n                (sub_x1, sub_y1, sub_x1 + half_size, sub_y1 + half_size))\n        return sub_regions\n\n    def get_targets(self, sampling_results, rcnn_train_cfg):\n        # mix all samples (across images) together.\n        pos_bboxes = torch.cat([res.pos_bboxes for res in sampling_results],\n                               dim=0).cpu()\n        pos_gt_bboxes = torch.cat(\n            [res.pos_gt_bboxes for res in sampling_results], dim=0).cpu()\n        assert pos_bboxes.shape == pos_gt_bboxes.shape\n\n        # expand pos_bboxes to 2x of original size\n        x1 = pos_bboxes[:, 0] - (pos_bboxes[:, 2] - pos_bboxes[:, 0]) / 2\n        y1 = pos_bboxes[:, 1] - (pos_bboxes[:, 3] - pos_bboxes[:, 1]) / 2\n        x2 = pos_bboxes[:, 2] + (pos_bboxes[:, 2] - pos_bboxes[:, 0]) / 2\n        y2 = pos_bboxes[:, 3] + (pos_bboxes[:, 3] - pos_bboxes[:, 1]) / 2\n        pos_bboxes = torch.stack([x1, y1, x2, y2], dim=-1)\n        pos_bbox_ws = (pos_bboxes[:, 2] - pos_bboxes[:, 0]).unsqueeze(-1)\n        pos_bbox_hs = (pos_bboxes[:, 3] - pos_bboxes[:, 1]).unsqueeze(-1)\n\n        num_rois = pos_bboxes.shape[0]\n        map_size = self.whole_map_size\n        # this is not the final target shape\n        targets = torch.zeros((num_rois, self.grid_points, map_size, map_size),\n                              dtype=torch.float)\n\n        # pre-compute interpolation factors for all grid points.\n        # the first item is the factor of x-dim, and the second is y-dim.\n        # for a 9-point grid, factors are like (1, 0), (0.5, 0.5), (0, 1)\n        factors = []\n        for j in range(self.grid_points):\n            x_idx = j // self.grid_size\n            y_idx = j % self.grid_size\n            factors.append((1 - x_idx / (self.grid_size - 1),\n                            1 - y_idx / (self.grid_size - 1)))\n\n        radius = rcnn_train_cfg.pos_radius\n        radius2 = radius**2\n        for i in range(num_rois):\n            # ignore small bboxes\n            if (pos_bbox_ws[i] <= self.grid_size\n                    or pos_bbox_hs[i] <= self.grid_size):\n                continue\n            # for each grid point, mark a small circle as positive\n            for j in range(self.grid_points):\n                factor_x, factor_y = factors[j]\n                gridpoint_x = factor_x * pos_gt_bboxes[i, 0] + (\n                    1 - factor_x) * pos_gt_bboxes[i, 2]\n                gridpoint_y = factor_y * pos_gt_bboxes[i, 1] + (\n                    1 - factor_y) * pos_gt_bboxes[i, 3]\n\n                cx = int((gridpoint_x - pos_bboxes[i, 0]) / pos_bbox_ws[i] *\n                         map_size)\n                cy = int((gridpoint_y - pos_bboxes[i, 1]) / pos_bbox_hs[i] *\n                         map_size)\n\n                for x in range(cx - radius, cx + radius + 1):\n                    for y in range(cy - radius, cy + radius + 1):\n                        if x >= 0 and x < map_size and y >= 0 and y < map_size:\n                            if (x - cx)**2 + (y - cy)**2 <= radius2:\n                                targets[i, j, y, x] = 1\n        # reduce the target heatmap size by a half\n        # proposed in Grid R-CNN Plus (https://arxiv.org/abs/1906.05688).\n        sub_targets = []\n        for i in range(self.grid_points):\n            sub_x1, sub_y1, sub_x2, sub_y2 = self.sub_regions[i]\n            sub_targets.append(targets[:, [i], sub_y1:sub_y2, sub_x1:sub_x2])\n        sub_targets = torch.cat(sub_targets, dim=1)\n        sub_targets = sub_targets.to(sampling_results[0].pos_bboxes.device)\n        return sub_targets\n\n    def loss(self, grid_pred, grid_targets):\n        loss_fused = self.loss_grid(grid_pred['fused'], grid_targets)\n        loss_unfused = self.loss_grid(grid_pred['unfused'], grid_targets)\n        loss_grid = loss_fused + loss_unfused\n        return dict(loss_grid=loss_grid)\n\n    def get_bboxes(self, det_bboxes, grid_pred, img_metas):\n        # TODO: refactoring\n        assert det_bboxes.shape[0] == grid_pred.shape[0]\n        det_bboxes = det_bboxes.cpu()\n        cls_scores = det_bboxes[:, [4]]\n        det_bboxes = det_bboxes[:, :4]\n        grid_pred = grid_pred.sigmoid().cpu()\n\n        R, c, h, w = grid_pred.shape\n        half_size = self.whole_map_size // 4 * 2\n        assert h == w == half_size\n        assert c == self.grid_points\n\n        # find the point with max scores in the half-sized heatmap\n        grid_pred = grid_pred.view(R * c, h * w)\n        pred_scores, pred_position = grid_pred.max(dim=1)\n        xs = pred_position % w\n        ys = pred_position // w\n\n        # get the position in the whole heatmap instead of half-sized heatmap\n        for i in range(self.grid_points):\n            xs[i::self.grid_points] += self.sub_regions[i][0]\n            ys[i::self.grid_points] += self.sub_regions[i][1]\n\n        # reshape to (num_rois, grid_points)\n        pred_scores, xs, ys = tuple(\n            map(lambda x: x.view(R, c), [pred_scores, xs, ys]))\n\n        # get expanded pos_bboxes\n        widths = (det_bboxes[:, 2] - det_bboxes[:, 0]).unsqueeze(-1)\n        heights = (det_bboxes[:, 3] - det_bboxes[:, 1]).unsqueeze(-1)\n        x1 = (det_bboxes[:, 0, None] - widths / 2)\n        y1 = (det_bboxes[:, 1, None] - heights / 2)\n        # map the grid point to the absolute coordinates\n        abs_xs = (xs.float() + 0.5) / w * widths + x1\n        abs_ys = (ys.float() + 0.5) / h * heights + y1\n\n        # get the grid points indices that fall on the bbox boundaries\n        x1_inds = [i for i in range(self.grid_size)]\n        y1_inds = [i * self.grid_size for i in range(self.grid_size)]\n        x2_inds = [\n            self.grid_points - self.grid_size + i\n            for i in range(self.grid_size)\n        ]\n        y2_inds = [(i + 1) * self.grid_size - 1 for i in range(self.grid_size)]\n\n        # voting of all grid points on some boundary\n        bboxes_x1 = (abs_xs[:, x1_inds] * pred_scores[:, x1_inds]).sum(\n            dim=1, keepdim=True) / (\n                pred_scores[:, x1_inds].sum(dim=1, keepdim=True))\n        bboxes_y1 = (abs_ys[:, y1_inds] * pred_scores[:, y1_inds]).sum(\n            dim=1, keepdim=True) / (\n                pred_scores[:, y1_inds].sum(dim=1, keepdim=True))\n        bboxes_x2 = (abs_xs[:, x2_inds] * pred_scores[:, x2_inds]).sum(\n            dim=1, keepdim=True) / (\n                pred_scores[:, x2_inds].sum(dim=1, keepdim=True))\n        bboxes_y2 = (abs_ys[:, y2_inds] * pred_scores[:, y2_inds]).sum(\n            dim=1, keepdim=True) / (\n                pred_scores[:, y2_inds].sum(dim=1, keepdim=True))\n\n        bbox_res = torch.cat(\n            [bboxes_x1, bboxes_y1, bboxes_x2, bboxes_y2, cls_scores], dim=1)\n        bbox_res[:, [0, 2]].clamp_(min=0, max=img_metas[0]['img_shape'][1])\n        bbox_res[:, [1, 3]].clamp_(min=0, max=img_metas[0]['img_shape'][0])\n\n        return bbox_res\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/htc_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import ConvModule\n\nfrom mmdet.models.builder import HEADS\nfrom .fcn_mask_head import FCNMaskHead\n\n\n@HEADS.register_module()\nclass HTCMaskHead(FCNMaskHead):\n\n    def __init__(self, with_conv_res=True, *args, **kwargs):\n        super(HTCMaskHead, self).__init__(*args, **kwargs)\n        self.with_conv_res = with_conv_res\n        if self.with_conv_res:\n            self.conv_res = ConvModule(\n                self.conv_out_channels,\n                self.conv_out_channels,\n                1,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n\n    def forward(self, x, res_feat=None, return_logits=True, return_feat=True):\n        if res_feat is not None:\n            assert self.with_conv_res\n            res_feat = self.conv_res(res_feat)\n            x = x + res_feat\n        for conv in self.convs:\n            x = conv(x)\n        res_feat = x\n        outs = []\n        if return_logits:\n            x = self.upsample(x)\n            if self.upsample_method == 'deconv':\n                x = self.relu(x)\n            mask_pred = self.conv_logits(x)\n            outs.append(mask_pred)\n        if return_feat:\n            outs.append(res_feat)\n        return outs if len(outs) > 1 else outs[0]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/mask_point_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend/point_head/point_head.py  # noqa\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.ops import point_sample, rel_roi_point_to_rel_img_point\nfrom mmcv.runner import BaseModule\n\nfrom mmdet.models.builder import HEADS, build_loss\nfrom mmdet.models.utils import (get_uncertain_point_coords_with_randomness,\n                                get_uncertainty)\n\n\n@HEADS.register_module()\nclass MaskPointHead(BaseModule):\n    \"\"\"A mask point head use in PointRend.\n\n    ``MaskPointHead`` use shared multi-layer perceptron (equivalent to\n    nn.Conv1d) to predict the logit of input points. The fine-grained feature\n    and coarse feature will be concatenate together for predication.\n\n    Args:\n        num_fcs (int): Number of fc layers in the head. Default: 3.\n        in_channels (int): Number of input channels. Default: 256.\n        fc_channels (int): Number of fc channels. Default: 256.\n        num_classes (int): Number of classes for logits. Default: 80.\n        class_agnostic (bool): Whether use class agnostic classification.\n            If so, the output channels of logits will be 1. Default: False.\n        coarse_pred_each_layer (bool): Whether concatenate coarse feature with\n            the output of each fc layer. Default: True.\n        conv_cfg (dict | None): Dictionary to construct and config conv layer.\n            Default: dict(type='Conv1d'))\n        norm_cfg (dict | None): Dictionary to construct and config norm layer.\n            Default: None.\n        loss_point (dict): Dictionary to construct and config loss layer of\n            point head. Default: dict(type='CrossEntropyLoss', use_mask=True,\n            loss_weight=1.0).\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 num_fcs=3,\n                 in_channels=256,\n                 fc_channels=256,\n                 class_agnostic=False,\n                 coarse_pred_each_layer=True,\n                 conv_cfg=dict(type='Conv1d'),\n                 norm_cfg=None,\n                 act_cfg=dict(type='ReLU'),\n                 loss_point=dict(\n                     type='CrossEntropyLoss', use_mask=True, loss_weight=1.0),\n                 init_cfg=dict(\n                     type='Normal', std=0.001,\n                     override=dict(name='fc_logits'))):\n        super().__init__(init_cfg)\n        self.num_fcs = num_fcs\n        self.in_channels = in_channels\n        self.fc_channels = fc_channels\n        self.num_classes = num_classes\n        self.class_agnostic = class_agnostic\n        self.coarse_pred_each_layer = coarse_pred_each_layer\n        self.conv_cfg = conv_cfg\n        self.norm_cfg = norm_cfg\n        self.loss_point = build_loss(loss_point)\n\n        fc_in_channels = in_channels + num_classes\n        self.fcs = nn.ModuleList()\n        for _ in range(num_fcs):\n            fc = ConvModule(\n                fc_in_channels,\n                fc_channels,\n                kernel_size=1,\n                stride=1,\n                padding=0,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg)\n            self.fcs.append(fc)\n            fc_in_channels = fc_channels\n            fc_in_channels += num_classes if self.coarse_pred_each_layer else 0\n\n        out_channels = 1 if self.class_agnostic else self.num_classes\n        self.fc_logits = nn.Conv1d(\n            fc_in_channels, out_channels, kernel_size=1, stride=1, padding=0)\n\n    def forward(self, fine_grained_feats, coarse_feats):\n        \"\"\"Classify each point base on fine grained and coarse feats.\n\n        Args:\n            fine_grained_feats (Tensor): Fine grained feature sampled from FPN,\n                shape (num_rois, in_channels, num_points).\n            coarse_feats (Tensor): Coarse feature sampled from CoarseMaskHead,\n                shape (num_rois, num_classes, num_points).\n\n        Returns:\n            Tensor: Point classification results,\n                shape (num_rois, num_class, num_points).\n        \"\"\"\n\n        x = torch.cat([fine_grained_feats, coarse_feats], dim=1)\n        for fc in self.fcs:\n            x = fc(x)\n            if self.coarse_pred_each_layer:\n                x = torch.cat((x, coarse_feats), dim=1)\n        return self.fc_logits(x)\n\n    def get_targets(self, rois, rel_roi_points, sampling_results, gt_masks,\n                    cfg):\n        \"\"\"Get training targets of MaskPointHead for all images.\n\n        Args:\n            rois (Tensor): Region of Interest, shape (num_rois, 5).\n            rel_roi_points: Points coordinates relative to RoI, shape\n                (num_rois, num_points, 2).\n            sampling_results (:obj:`SamplingResult`): Sampling result after\n                sampling and assignment.\n            gt_masks (Tensor) : Ground truth segmentation masks of\n                corresponding boxes, shape (num_rois, height, width).\n            cfg (dict): Training cfg.\n\n        Returns:\n            Tensor: Point target, shape (num_rois, num_points).\n        \"\"\"\n\n        num_imgs = len(sampling_results)\n        rois_list = []\n        rel_roi_points_list = []\n        for batch_ind in range(num_imgs):\n            inds = (rois[:, 0] == batch_ind)\n            rois_list.append(rois[inds])\n            rel_roi_points_list.append(rel_roi_points[inds])\n        pos_assigned_gt_inds_list = [\n            res.pos_assigned_gt_inds for res in sampling_results\n        ]\n        cfg_list = [cfg for _ in range(num_imgs)]\n\n        point_targets = map(self._get_target_single, rois_list,\n                            rel_roi_points_list, pos_assigned_gt_inds_list,\n                            gt_masks, cfg_list)\n        point_targets = list(point_targets)\n\n        if len(point_targets) > 0:\n            point_targets = torch.cat(point_targets)\n\n        return point_targets\n\n    def _get_target_single(self, rois, rel_roi_points, pos_assigned_gt_inds,\n                           gt_masks, cfg):\n        \"\"\"Get training target of MaskPointHead for each image.\"\"\"\n        num_pos = rois.size(0)\n        num_points = cfg.num_points\n        if num_pos > 0:\n            gt_masks_th = (\n                gt_masks.to_tensor(rois.dtype, rois.device).index_select(\n                    0, pos_assigned_gt_inds))\n            gt_masks_th = gt_masks_th.unsqueeze(1)\n            rel_img_points = rel_roi_point_to_rel_img_point(\n                rois, rel_roi_points, gt_masks_th)\n            point_targets = point_sample(gt_masks_th,\n                                         rel_img_points).squeeze(1)\n        else:\n            point_targets = rois.new_zeros((0, num_points))\n        return point_targets\n\n    def loss(self, point_pred, point_targets, labels):\n        \"\"\"Calculate loss for MaskPointHead.\n\n        Args:\n            point_pred (Tensor): Point predication result, shape\n                (num_rois, num_classes, num_points).\n            point_targets (Tensor): Point targets, shape (num_roi, num_points).\n            labels (Tensor): Class label of corresponding boxes,\n                shape (num_rois, )\n\n        Returns:\n            dict[str, Tensor]: a dictionary of point loss components\n        \"\"\"\n\n        loss = dict()\n        if self.class_agnostic:\n            loss_point = self.loss_point(point_pred, point_targets,\n                                         torch.zeros_like(labels))\n        else:\n            loss_point = self.loss_point(point_pred, point_targets, labels)\n        loss['loss_point'] = loss_point\n        return loss\n\n    def get_roi_rel_points_train(self, mask_pred, labels, cfg):\n        \"\"\"Get ``num_points`` most uncertain points with random points during\n        train.\n\n        Sample points in [0, 1] x [0, 1] coordinate space based on their\n        uncertainty. The uncertainties are calculated for each point using\n        '_get_uncertainty()' function that takes point's logit prediction as\n        input.\n\n        Args:\n            mask_pred (Tensor): A tensor of shape (num_rois, num_classes,\n                mask_height, mask_width) for class-specific or class-agnostic\n                prediction.\n            labels (list): The ground truth class for each instance.\n            cfg (dict): Training config of point head.\n\n        Returns:\n            point_coords (Tensor): A tensor of shape (num_rois, num_points, 2)\n                that contains the coordinates sampled points.\n        \"\"\"\n        point_coords = get_uncertain_point_coords_with_randomness(\n            mask_pred, labels, cfg.num_points, cfg.oversample_ratio,\n            cfg.importance_sample_ratio)\n        return point_coords\n\n    def get_roi_rel_points_test(self, mask_pred, pred_label, cfg):\n        \"\"\"Get ``num_points`` most uncertain points during test.\n\n        Args:\n            mask_pred (Tensor): A tensor of shape (num_rois, num_classes,\n                mask_height, mask_width) for class-specific or class-agnostic\n                prediction.\n            pred_label (list): The predication class for each instance.\n            cfg (dict): Testing config of point head.\n\n        Returns:\n            point_indices (Tensor): A tensor of shape (num_rois, num_points)\n                that contains indices from [0, mask_height x mask_width) of the\n                most uncertain points.\n            point_coords (Tensor): A tensor of shape (num_rois, num_points, 2)\n                that contains [0, 1] x [0, 1] normalized coordinates of the\n                most uncertain points from the [mask_height, mask_width] grid .\n        \"\"\"\n        num_points = cfg.subdivision_num_points\n        uncertainty_map = get_uncertainty(mask_pred, pred_label)\n        num_rois, _, mask_height, mask_width = uncertainty_map.shape\n\n        # During ONNX exporting, the type of each elements of 'shape' is\n        # `Tensor(float)`, while it is `float` during PyTorch inference.\n        if isinstance(mask_height, torch.Tensor):\n            h_step = 1.0 / mask_height.float()\n            w_step = 1.0 / mask_width.float()\n        else:\n            h_step = 1.0 / mask_height\n            w_step = 1.0 / mask_width\n        # cast to int to avoid dynamic K for TopK op in ONNX\n        mask_size = int(mask_height * mask_width)\n        uncertainty_map = uncertainty_map.view(num_rois, mask_size)\n        num_points = min(mask_size, num_points)\n        point_indices = uncertainty_map.topk(num_points, dim=1)[1]\n        xs = w_step / 2.0 + (point_indices % mask_width).float() * w_step\n        ys = h_step / 2.0 + (point_indices // mask_width).float() * h_step\n        point_coords = torch.stack([xs, ys], dim=2)\n        return point_indices, point_coords\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/maskiou_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import Conv2d, Linear, MaxPool2d\nfrom mmcv.runner import BaseModule, force_fp32\nfrom torch.nn.modules.utils import _pair\n\nfrom mmdet.models.builder import HEADS, build_loss\n\n\n@HEADS.register_module()\nclass MaskIoUHead(BaseModule):\n    \"\"\"Mask IoU Head.\n\n    This head predicts the IoU of predicted masks and corresponding gt masks.\n    \"\"\"\n\n    def __init__(self,\n                 num_convs=4,\n                 num_fcs=2,\n                 roi_feat_size=14,\n                 in_channels=256,\n                 conv_out_channels=256,\n                 fc_out_channels=1024,\n                 num_classes=80,\n                 loss_iou=dict(type='MSELoss', loss_weight=0.5),\n                 init_cfg=[\n                     dict(type='Kaiming', override=dict(name='convs')),\n                     dict(type='Caffe2Xavier', override=dict(name='fcs')),\n                     dict(\n                         type='Normal',\n                         std=0.01,\n                         override=dict(name='fc_mask_iou'))\n                 ]):\n        super(MaskIoUHead, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.conv_out_channels = conv_out_channels\n        self.fc_out_channels = fc_out_channels\n        self.num_classes = num_classes\n        self.fp16_enabled = False\n\n        self.convs = nn.ModuleList()\n        for i in range(num_convs):\n            if i == 0:\n                # concatenation of mask feature and mask prediction\n                in_channels = self.in_channels + 1\n            else:\n                in_channels = self.conv_out_channels\n            stride = 2 if i == num_convs - 1 else 1\n            self.convs.append(\n                Conv2d(\n                    in_channels,\n                    self.conv_out_channels,\n                    3,\n                    stride=stride,\n                    padding=1))\n\n        roi_feat_size = _pair(roi_feat_size)\n        pooled_area = (roi_feat_size[0] // 2) * (roi_feat_size[1] // 2)\n        self.fcs = nn.ModuleList()\n        for i in range(num_fcs):\n            in_channels = (\n                self.conv_out_channels *\n                pooled_area if i == 0 else self.fc_out_channels)\n            self.fcs.append(Linear(in_channels, self.fc_out_channels))\n\n        self.fc_mask_iou = Linear(self.fc_out_channels, self.num_classes)\n        self.relu = nn.ReLU()\n        self.max_pool = MaxPool2d(2, 2)\n        self.loss_iou = build_loss(loss_iou)\n\n    def forward(self, mask_feat, mask_pred):\n        mask_pred = mask_pred.sigmoid()\n        mask_pred_pooled = self.max_pool(mask_pred.unsqueeze(1))\n\n        x = torch.cat((mask_feat, mask_pred_pooled), 1)\n\n        for conv in self.convs:\n            x = self.relu(conv(x))\n        x = x.flatten(1)\n        for fc in self.fcs:\n            x = self.relu(fc(x))\n        mask_iou = self.fc_mask_iou(x)\n        return mask_iou\n\n    @force_fp32(apply_to=('mask_iou_pred', ))\n    def loss(self, mask_iou_pred, mask_iou_targets):\n        pos_inds = mask_iou_targets > 0\n        if pos_inds.sum() > 0:\n            loss_mask_iou = self.loss_iou(mask_iou_pred[pos_inds],\n                                          mask_iou_targets[pos_inds])\n        else:\n            loss_mask_iou = mask_iou_pred.sum() * 0\n        return dict(loss_mask_iou=loss_mask_iou)\n\n    @force_fp32(apply_to=('mask_pred', ))\n    def get_targets(self, sampling_results, gt_masks, mask_pred, mask_targets,\n                    rcnn_train_cfg):\n        \"\"\"Compute target of mask IoU.\n\n        Mask IoU target is the IoU of the predicted mask (inside a bbox) and\n        the gt mask of corresponding gt mask (the whole instance).\n        The intersection area is computed inside the bbox, and the gt mask area\n        is computed with two steps, firstly we compute the gt area inside the\n        bbox, then divide it by the area ratio of gt area inside the bbox and\n        the gt area of the whole instance.\n\n        Args:\n            sampling_results (list[:obj:`SamplingResult`]): sampling results.\n            gt_masks (BitmapMask | PolygonMask): Gt masks (the whole instance)\n                of each image, with the same shape of the input image.\n            mask_pred (Tensor): Predicted masks of each positive proposal,\n                shape (num_pos, h, w).\n            mask_targets (Tensor): Gt mask of each positive proposal,\n                binary map of the shape (num_pos, h, w).\n            rcnn_train_cfg (dict): Training config for R-CNN part.\n\n        Returns:\n            Tensor: mask iou target (length == num positive).\n        \"\"\"\n        pos_proposals = [res.pos_bboxes for res in sampling_results]\n        pos_assigned_gt_inds = [\n            res.pos_assigned_gt_inds for res in sampling_results\n        ]\n\n        # compute the area ratio of gt areas inside the proposals and\n        # the whole instance\n        area_ratios = map(self._get_area_ratio, pos_proposals,\n                          pos_assigned_gt_inds, gt_masks)\n        area_ratios = torch.cat(list(area_ratios))\n        assert mask_targets.size(0) == area_ratios.size(0)\n\n        mask_pred = (mask_pred > rcnn_train_cfg.mask_thr_binary).float()\n        mask_pred_areas = mask_pred.sum((-1, -2))\n\n        # mask_pred and mask_targets are binary maps\n        overlap_areas = (mask_pred * mask_targets).sum((-1, -2))\n\n        # compute the mask area of the whole instance\n        gt_full_areas = mask_targets.sum((-1, -2)) / (area_ratios + 1e-7)\n\n        mask_iou_targets = overlap_areas / (\n            mask_pred_areas + gt_full_areas - overlap_areas)\n        return mask_iou_targets\n\n    def _get_area_ratio(self, pos_proposals, pos_assigned_gt_inds, gt_masks):\n        \"\"\"Compute area ratio of the gt mask inside the proposal and the gt\n        mask of the corresponding instance.\"\"\"\n        num_pos = pos_proposals.size(0)\n        if num_pos > 0:\n            area_ratios = []\n            proposals_np = pos_proposals.cpu().numpy()\n            pos_assigned_gt_inds = pos_assigned_gt_inds.cpu().numpy()\n            # compute mask areas of gt instances (batch processing for speedup)\n            gt_instance_mask_area = gt_masks.areas\n            for i in range(num_pos):\n                gt_mask = gt_masks[pos_assigned_gt_inds[i]]\n\n                # crop the gt mask inside the proposal\n                bbox = proposals_np[i, :].astype(np.int32)\n                gt_mask_in_proposal = gt_mask.crop(bbox)\n\n                ratio = gt_mask_in_proposal.areas[0] / (\n                    gt_instance_mask_area[pos_assigned_gt_inds[i]] + 1e-7)\n                area_ratios.append(ratio)\n            area_ratios = torch.from_numpy(np.stack(area_ratios)).float().to(\n                pos_proposals.device)\n        else:\n            area_ratios = pos_proposals.new_zeros((0, ))\n        return area_ratios\n\n    @force_fp32(apply_to=('mask_iou_pred', ))\n    def get_mask_scores(self, mask_iou_pred, det_bboxes, det_labels):\n        \"\"\"Get the mask scores.\n\n        mask_score = bbox_score * mask_iou\n        \"\"\"\n        inds = range(det_labels.size(0))\n        mask_scores = mask_iou_pred[inds, det_labels] * det_bboxes[inds, -1]\n        mask_scores = mask_scores.cpu().numpy()\n        det_labels = det_labels.cpu().numpy()\n        return [mask_scores[det_labels == i] for i in range(self.num_classes)]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/scnet_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.builder import HEADS\nfrom mmdet.models.utils import ResLayer, SimplifiedBasicBlock\nfrom .fcn_mask_head import FCNMaskHead\n\n\n@HEADS.register_module()\nclass SCNetMaskHead(FCNMaskHead):\n    \"\"\"Mask head for `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    Args:\n        conv_to_res (bool, optional): if True, change the conv layers to\n            ``SimplifiedBasicBlock``.\n    \"\"\"\n\n    def __init__(self, conv_to_res=True, **kwargs):\n        super(SCNetMaskHead, self).__init__(**kwargs)\n        self.conv_to_res = conv_to_res\n        if conv_to_res:\n            assert self.conv_kernel_size == 3\n            self.num_res_blocks = self.num_convs // 2\n            self.convs = ResLayer(\n                SimplifiedBasicBlock,\n                self.in_channels,\n                self.conv_out_channels,\n                self.num_res_blocks,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_heads/scnet_semantic_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.models.builder import HEADS\nfrom mmdet.models.utils import ResLayer, SimplifiedBasicBlock\nfrom .fused_semantic_head import FusedSemanticHead\n\n\n@HEADS.register_module()\nclass SCNetSemanticHead(FusedSemanticHead):\n    \"\"\"Mask head for `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    Args:\n        conv_to_res (bool, optional): if True, change the conv layers to\n            ``SimplifiedBasicBlock``.\n    \"\"\"\n\n    def __init__(self, conv_to_res=True, **kwargs):\n        super(SCNetSemanticHead, self).__init__(**kwargs)\n        self.conv_to_res = conv_to_res\n        if self.conv_to_res:\n            num_res_blocks = self.num_convs // 2\n            self.convs = ResLayer(\n                SimplifiedBasicBlock,\n                self.in_channels,\n                self.conv_out_channels,\n                num_res_blocks,\n                conv_cfg=self.conv_cfg,\n                norm_cfg=self.norm_cfg)\n            self.num_convs = num_res_blocks\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/mask_scoring_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import bbox2roi\nfrom ..builder import HEADS, build_head\nfrom .standard_roi_head import StandardRoIHead\n\n\n@HEADS.register_module()\nclass MaskScoringRoIHead(StandardRoIHead):\n    \"\"\"Mask Scoring RoIHead for Mask Scoring RCNN.\n\n    https://arxiv.org/abs/1903.00241\n    \"\"\"\n\n    def __init__(self, mask_iou_head, **kwargs):\n        assert mask_iou_head is not None\n        super(MaskScoringRoIHead, self).__init__(**kwargs)\n        self.mask_iou_head = build_head(mask_iou_head)\n\n    def _mask_forward_train(self, x, sampling_results, bbox_feats, gt_masks,\n                            img_metas):\n        \"\"\"Run forward function and calculate loss for Mask head in\n        training.\"\"\"\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        mask_results = super(MaskScoringRoIHead,\n                             self)._mask_forward_train(x, sampling_results,\n                                                       bbox_feats, gt_masks,\n                                                       img_metas)\n        if mask_results['loss_mask'] is None:\n            return mask_results\n\n        # mask iou head forward and loss\n        pos_mask_pred = mask_results['mask_pred'][\n            range(mask_results['mask_pred'].size(0)), pos_labels]\n        mask_iou_pred = self.mask_iou_head(mask_results['mask_feats'],\n                                           pos_mask_pred)\n        pos_mask_iou_pred = mask_iou_pred[range(mask_iou_pred.size(0)),\n                                          pos_labels]\n\n        mask_iou_targets = self.mask_iou_head.get_targets(\n            sampling_results, gt_masks, pos_mask_pred,\n            mask_results['mask_targets'], self.train_cfg)\n        loss_mask_iou = self.mask_iou_head.loss(pos_mask_iou_pred,\n                                                mask_iou_targets)\n        mask_results['loss_mask'].update(loss_mask_iou)\n        return mask_results\n\n    def simple_test_mask(self,\n                         x,\n                         img_metas,\n                         det_bboxes,\n                         det_labels,\n                         rescale=False):\n        \"\"\"Obtain mask prediction without augmentation.\"\"\"\n        # image shapes of images in the batch\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        num_imgs = len(det_bboxes)\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            num_classes = self.mask_head.num_classes\n            segm_results = [[[] for _ in range(num_classes)]\n                            for _ in range(num_imgs)]\n            mask_scores = [[[] for _ in range(num_classes)]\n                           for _ in range(num_imgs)]\n        else:\n            # if det_bboxes is rescaled to the original image size, we need to\n            # rescale it back to the testing scale to obtain RoIs.\n            if rescale and not isinstance(scale_factors[0], float):\n                scale_factors = [\n                    torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                    for scale_factor in scale_factors\n                ]\n            _bboxes = [\n                det_bboxes[i][:, :4] *\n                scale_factors[i] if rescale else det_bboxes[i]\n                for i in range(num_imgs)\n            ]\n            mask_rois = bbox2roi(_bboxes)\n            mask_results = self._mask_forward(x, mask_rois)\n            concat_det_labels = torch.cat(det_labels)\n            # get mask scores with mask iou head\n            mask_feats = mask_results['mask_feats']\n            mask_pred = mask_results['mask_pred']\n            mask_iou_pred = self.mask_iou_head(\n                mask_feats, mask_pred[range(concat_det_labels.size(0)),\n                                      concat_det_labels])\n            # split batch mask prediction back to each image\n            num_bboxes_per_img = tuple(len(_bbox) for _bbox in _bboxes)\n            mask_preds = mask_pred.split(num_bboxes_per_img, 0)\n            mask_iou_preds = mask_iou_pred.split(num_bboxes_per_img, 0)\n\n            # apply mask post-processing to each image individually\n            segm_results = []\n            mask_scores = []\n            for i in range(num_imgs):\n                if det_bboxes[i].shape[0] == 0:\n                    segm_results.append(\n                        [[] for _ in range(self.mask_head.num_classes)])\n                    mask_scores.append(\n                        [[] for _ in range(self.mask_head.num_classes)])\n                else:\n                    segm_result = self.mask_head.get_seg_masks(\n                        mask_preds[i], _bboxes[i], det_labels[i],\n                        self.test_cfg, ori_shapes[i], scale_factors[i],\n                        rescale)\n                    # get mask scores with mask iou head\n                    mask_score = self.mask_iou_head.get_mask_scores(\n                        mask_iou_preds[i], det_bboxes[i], det_labels[i])\n                    segm_results.append(segm_result)\n                    mask_scores.append(mask_score)\n        return list(zip(segm_results, mask_scores))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/pisa_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet.core import bbox2roi\nfrom ..builder import HEADS\nfrom ..losses.pisa_loss import carl_loss, isr_p\nfrom .standard_roi_head import StandardRoIHead\n\n\n@HEADS.register_module()\nclass PISARoIHead(StandardRoIHead):\n    r\"\"\"The RoI head for `Prime Sample Attention in Object Detection\n    <https://arxiv.org/abs/1904.04821>`_.\"\"\"\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None):\n        \"\"\"Forward function for training.\n\n        Args:\n            x (list[Tensor]): List of multi-level img features.\n            img_metas (list[dict]): List of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            proposals (list[Tensors]): List of region proposals.\n            gt_bboxes (list[Tensor]): Each item are the truth boxes for each\n                image in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): Class indices corresponding to each box\n            gt_bboxes_ignore (list[Tensor], optional): Specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_masks (None | Tensor) : True segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # assign gts and sample proposals\n        if self.with_bbox or self.with_mask:\n            num_imgs = len(img_metas)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n            sampling_results = []\n            neg_label_weights = []\n            for i in range(num_imgs):\n                assign_result = self.bbox_assigner.assign(\n                    proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],\n                    gt_labels[i])\n                sampling_result = self.bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[i],\n                    gt_bboxes[i],\n                    gt_labels[i],\n                    feats=[lvl_feat[i][None] for lvl_feat in x])\n                # neg label weight is obtained by sampling when using ISR-N\n                neg_label_weight = None\n                if isinstance(sampling_result, tuple):\n                    sampling_result, neg_label_weight = sampling_result\n                sampling_results.append(sampling_result)\n                neg_label_weights.append(neg_label_weight)\n\n        losses = dict()\n        # bbox head forward and loss\n        if self.with_bbox:\n            bbox_results = self._bbox_forward_train(\n                x,\n                sampling_results,\n                gt_bboxes,\n                gt_labels,\n                img_metas,\n                neg_label_weights=neg_label_weights)\n            losses.update(bbox_results['loss_bbox'])\n\n        # mask head forward and loss\n        if self.with_mask:\n            mask_results = self._mask_forward_train(x, sampling_results,\n                                                    bbox_results['bbox_feats'],\n                                                    gt_masks, img_metas)\n            losses.update(mask_results['loss_mask'])\n\n        return losses\n\n    def _bbox_forward(self, x, rois):\n        \"\"\"Box forward function used in both training and testing.\"\"\"\n        # TODO: a more flexible way to decide which feature maps to use\n        bbox_feats = self.bbox_roi_extractor(\n            x[:self.bbox_roi_extractor.num_inputs], rois)\n        if self.with_shared_head:\n            bbox_feats = self.shared_head(bbox_feats)\n        cls_score, bbox_pred = self.bbox_head(bbox_feats)\n\n        bbox_results = dict(\n            cls_score=cls_score, bbox_pred=bbox_pred, bbox_feats=bbox_feats)\n        return bbox_results\n\n    def _bbox_forward_train(self,\n                            x,\n                            sampling_results,\n                            gt_bboxes,\n                            gt_labels,\n                            img_metas,\n                            neg_label_weights=None):\n        \"\"\"Run forward function and calculate loss for box head in training.\"\"\"\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n\n        bbox_results = self._bbox_forward(x, rois)\n\n        bbox_targets = self.bbox_head.get_targets(sampling_results, gt_bboxes,\n                                                  gt_labels, self.train_cfg)\n\n        # neg_label_weights obtained by sampler is image-wise, mapping back to\n        # the corresponding location in label weights\n        if neg_label_weights[0] is not None:\n            label_weights = bbox_targets[1]\n            cur_num_rois = 0\n            for i in range(len(sampling_results)):\n                num_pos = sampling_results[i].pos_inds.size(0)\n                num_neg = sampling_results[i].neg_inds.size(0)\n                label_weights[cur_num_rois + num_pos:cur_num_rois + num_pos +\n                              num_neg] = neg_label_weights[i]\n                cur_num_rois += num_pos + num_neg\n\n        cls_score = bbox_results['cls_score']\n        bbox_pred = bbox_results['bbox_pred']\n\n        # Apply ISR-P\n        isr_cfg = self.train_cfg.get('isr', None)\n        if isr_cfg is not None:\n            bbox_targets = isr_p(\n                cls_score,\n                bbox_pred,\n                bbox_targets,\n                rois,\n                sampling_results,\n                self.bbox_head.loss_cls,\n                self.bbox_head.bbox_coder,\n                **isr_cfg,\n                num_class=self.bbox_head.num_classes)\n        loss_bbox = self.bbox_head.loss(cls_score, bbox_pred, rois,\n                                        *bbox_targets)\n\n        # Add CARL Loss\n        carl_cfg = self.train_cfg.get('carl', None)\n        if carl_cfg is not None:\n            loss_carl = carl_loss(\n                cls_score,\n                bbox_targets[0],\n                bbox_pred,\n                bbox_targets[2],\n                self.bbox_head.loss_bbox,\n                **carl_cfg,\n                num_class=self.bbox_head.num_classes)\n            loss_bbox.update(loss_carl)\n\n        bbox_results.update(loss_bbox=loss_bbox)\n        return bbox_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/point_rend_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n# Modified from https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend  # noqa\nimport os\nimport warnings\n\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\nfrom mmcv.ops import point_sample, rel_roi_point_to_rel_img_point\n\nfrom mmdet.core import bbox2roi, bbox_mapping, merge_aug_masks\nfrom .. import builder\nfrom ..builder import HEADS\nfrom .standard_roi_head import StandardRoIHead\n\n\n@HEADS.register_module()\nclass PointRendRoIHead(StandardRoIHead):\n    \"\"\"`PointRend <https://arxiv.org/abs/1912.08193>`_.\"\"\"\n\n    def __init__(self, point_head, *args, **kwargs):\n        super().__init__(*args, **kwargs)\n        assert self.with_bbox and self.with_mask\n        self.init_point_head(point_head)\n\n    def init_point_head(self, point_head):\n        \"\"\"Initialize ``point_head``\"\"\"\n        self.point_head = builder.build_head(point_head)\n\n    def _mask_forward_train(self, x, sampling_results, bbox_feats, gt_masks,\n                            img_metas):\n        \"\"\"Run forward function and calculate loss for mask head and point head\n        in training.\"\"\"\n        mask_results = super()._mask_forward_train(x, sampling_results,\n                                                   bbox_feats, gt_masks,\n                                                   img_metas)\n        if mask_results['loss_mask'] is not None:\n            loss_point = self._mask_point_forward_train(\n                x, sampling_results, mask_results['mask_pred'], gt_masks,\n                img_metas)\n            mask_results['loss_mask'].update(loss_point)\n\n        return mask_results\n\n    def _mask_point_forward_train(self, x, sampling_results, mask_pred,\n                                  gt_masks, img_metas):\n        \"\"\"Run forward function and calculate loss for point head in\n        training.\"\"\"\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        rel_roi_points = self.point_head.get_roi_rel_points_train(\n            mask_pred, pos_labels, cfg=self.train_cfg)\n        rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n\n        fine_grained_point_feats = self._get_fine_grained_point_feats(\n            x, rois, rel_roi_points, img_metas)\n        coarse_point_feats = point_sample(mask_pred, rel_roi_points)\n        mask_point_pred = self.point_head(fine_grained_point_feats,\n                                          coarse_point_feats)\n        mask_point_target = self.point_head.get_targets(\n            rois, rel_roi_points, sampling_results, gt_masks, self.train_cfg)\n        loss_mask_point = self.point_head.loss(mask_point_pred,\n                                               mask_point_target, pos_labels)\n\n        return loss_mask_point\n\n    def _get_fine_grained_point_feats(self, x, rois, rel_roi_points,\n                                      img_metas):\n        \"\"\"Sample fine grained feats from each level feature map and\n        concatenate them together.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            rois (Tensor): shape (num_rois, 5).\n            rel_roi_points (Tensor): A tensor of shape (num_rois, num_points,\n                2) that contains [0, 1] x [0, 1] normalized coordinates of the\n                most uncertain points from the [mask_height, mask_width] grid.\n            img_metas (list[dict]): Image meta info.\n\n        Returns:\n            Tensor: The fine grained features for each points,\n                has shape (num_rois, feats_channels, num_points).\n        \"\"\"\n        num_imgs = len(img_metas)\n        fine_grained_feats = []\n        for idx in range(self.mask_roi_extractor.num_inputs):\n            feats = x[idx]\n            spatial_scale = 1. / float(\n                self.mask_roi_extractor.featmap_strides[idx])\n            point_feats = []\n            for batch_ind in range(num_imgs):\n                # unravel batch dim\n                feat = feats[batch_ind].unsqueeze(0)\n                inds = (rois[:, 0].long() == batch_ind)\n                if inds.any():\n                    rel_img_points = rel_roi_point_to_rel_img_point(\n                        rois[inds], rel_roi_points[inds], feat.shape[2:],\n                        spatial_scale).unsqueeze(0)\n                    point_feat = point_sample(feat, rel_img_points)\n                    point_feat = point_feat.squeeze(0).transpose(0, 1)\n                    point_feats.append(point_feat)\n            fine_grained_feats.append(torch.cat(point_feats, dim=0))\n        return torch.cat(fine_grained_feats, dim=1)\n\n    def _mask_point_forward_test(self, x, rois, label_pred, mask_pred,\n                                 img_metas):\n        \"\"\"Mask refining process with point head in testing.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            rois (Tensor): shape (num_rois, 5).\n            label_pred (Tensor): The predication class for each rois.\n            mask_pred (Tensor): The predication coarse masks of\n                shape (num_rois, num_classes, small_size, small_size).\n            img_metas (list[dict]): Image meta info.\n\n        Returns:\n            Tensor: The refined masks of shape (num_rois, num_classes,\n                large_size, large_size).\n        \"\"\"\n        refined_mask_pred = mask_pred.clone()\n        for subdivision_step in range(self.test_cfg.subdivision_steps):\n            refined_mask_pred = F.interpolate(\n                refined_mask_pred,\n                scale_factor=self.test_cfg.scale_factor,\n                mode='bilinear',\n                align_corners=False)\n            # If `subdivision_num_points` is larger or equal to the\n            # resolution of the next step, then we can skip this step\n            num_rois, channels, mask_height, mask_width = \\\n                refined_mask_pred.shape\n            if (self.test_cfg.subdivision_num_points >=\n                    self.test_cfg.scale_factor**2 * mask_height * mask_width\n                    and\n                    subdivision_step < self.test_cfg.subdivision_steps - 1):\n                continue\n            point_indices, rel_roi_points = \\\n                self.point_head.get_roi_rel_points_test(\n                    refined_mask_pred, label_pred, cfg=self.test_cfg)\n            fine_grained_point_feats = self._get_fine_grained_point_feats(\n                x, rois, rel_roi_points, img_metas)\n            coarse_point_feats = point_sample(mask_pred, rel_roi_points)\n            mask_point_pred = self.point_head(fine_grained_point_feats,\n                                              coarse_point_feats)\n\n            point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1)\n            refined_mask_pred = refined_mask_pred.reshape(\n                num_rois, channels, mask_height * mask_width)\n            refined_mask_pred = refined_mask_pred.scatter_(\n                2, point_indices, mask_point_pred)\n            refined_mask_pred = refined_mask_pred.view(num_rois, channels,\n                                                       mask_height, mask_width)\n\n        return refined_mask_pred\n\n    def simple_test_mask(self,\n                         x,\n                         img_metas,\n                         det_bboxes,\n                         det_labels,\n                         rescale=False):\n        \"\"\"Obtain mask prediction without augmentation.\"\"\"\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        if isinstance(scale_factors[0], float):\n            warnings.warn(\n                'Scale factor in img_metas should be a '\n                'ndarray with shape (4,) '\n                'arrange as (factor_w, factor_h, factor_w, factor_h), '\n                'The scale_factor with float type has been deprecated. ')\n            scale_factors = np.array([scale_factors] * 4, dtype=np.float32)\n\n        num_imgs = len(det_bboxes)\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            segm_results = [[[] for _ in range(self.mask_head.num_classes)]\n                            for _ in range(num_imgs)]\n        else:\n            # if det_bboxes is rescaled to the original image size, we need to\n            # rescale it back to the testing scale to obtain RoIs.\n            _bboxes = [det_bboxes[i][:, :4] for i in range(len(det_bboxes))]\n            if rescale:\n                scale_factors = [\n                    torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                    for scale_factor in scale_factors\n                ]\n                _bboxes = [\n                    _bboxes[i] * scale_factors[i] for i in range(len(_bboxes))\n                ]\n\n            mask_rois = bbox2roi(_bboxes)\n            mask_results = self._mask_forward(x, mask_rois)\n            # split batch mask prediction back to each image\n            mask_pred = mask_results['mask_pred']\n            num_mask_roi_per_img = [len(det_bbox) for det_bbox in det_bboxes]\n            mask_preds = mask_pred.split(num_mask_roi_per_img, 0)\n            mask_rois = mask_rois.split(num_mask_roi_per_img, 0)\n\n            # apply mask post-processing to each image individually\n            segm_results = []\n            for i in range(num_imgs):\n                if det_bboxes[i].shape[0] == 0:\n                    segm_results.append(\n                        [[] for _ in range(self.mask_head.num_classes)])\n                else:\n                    x_i = [xx[[i]] for xx in x]\n                    mask_rois_i = mask_rois[i]\n                    mask_rois_i[:, 0] = 0  # TODO: remove this hack\n                    mask_pred_i = self._mask_point_forward_test(\n                        x_i, mask_rois_i, det_labels[i], mask_preds[i],\n                        [img_metas])\n                    segm_result = self.mask_head.get_seg_masks(\n                        mask_pred_i, _bboxes[i], det_labels[i], self.test_cfg,\n                        ori_shapes[i], scale_factors[i], rescale)\n                    segm_results.append(segm_result)\n        return segm_results\n\n    def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):\n        \"\"\"Test for mask head with test time augmentation.\"\"\"\n        if det_bboxes.shape[0] == 0:\n            segm_result = [[] for _ in range(self.mask_head.num_classes)]\n        else:\n            aug_masks = []\n            for x, img_meta in zip(feats, img_metas):\n                img_shape = img_meta[0]['img_shape']\n                scale_factor = img_meta[0]['scale_factor']\n                flip = img_meta[0]['flip']\n                _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                       scale_factor, flip)\n                mask_rois = bbox2roi([_bboxes])\n                mask_results = self._mask_forward(x, mask_rois)\n                mask_results['mask_pred'] = self._mask_point_forward_test(\n                    x, mask_rois, det_labels, mask_results['mask_pred'],\n                    img_meta)\n                # convert to numpy array to save memory\n                aug_masks.append(\n                    mask_results['mask_pred'].sigmoid().cpu().numpy())\n            merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg)\n\n            ori_shape = img_metas[0][0]['ori_shape']\n            segm_result = self.mask_head.get_seg_masks(\n                merged_masks,\n                det_bboxes,\n                det_labels,\n                self.test_cfg,\n                ori_shape,\n                scale_factor=1.0,\n                rescale=False)\n        return segm_result\n\n    def _onnx_get_fine_grained_point_feats(self, x, rois, rel_roi_points):\n        \"\"\"Export the process of sampling fine grained feats to onnx.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            rois (Tensor): shape (num_rois, 5).\n            rel_roi_points (Tensor): A tensor of shape (num_rois, num_points,\n                2) that contains [0, 1] x [0, 1] normalized coordinates of the\n                most uncertain points from the [mask_height, mask_width] grid.\n\n        Returns:\n            Tensor: The fine grained features for each points,\n                has shape (num_rois, feats_channels, num_points).\n        \"\"\"\n        batch_size = x[0].shape[0]\n        num_rois = rois.shape[0]\n        fine_grained_feats = []\n        for idx in range(self.mask_roi_extractor.num_inputs):\n            feats = x[idx]\n            spatial_scale = 1. / float(\n                self.mask_roi_extractor.featmap_strides[idx])\n\n            rel_img_points = rel_roi_point_to_rel_img_point(\n                rois, rel_roi_points, feats, spatial_scale)\n            channels = feats.shape[1]\n            num_points = rel_img_points.shape[1]\n            rel_img_points = rel_img_points.reshape(batch_size, -1, num_points,\n                                                    2)\n            point_feats = point_sample(feats, rel_img_points)\n            point_feats = point_feats.transpose(1, 2).reshape(\n                num_rois, channels, num_points)\n            fine_grained_feats.append(point_feats)\n        return torch.cat(fine_grained_feats, dim=1)\n\n    def _mask_point_onnx_export(self, x, rois, label_pred, mask_pred):\n        \"\"\"Export mask refining process with point head to onnx.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            rois (Tensor): shape (num_rois, 5).\n            label_pred (Tensor): The predication class for each rois.\n            mask_pred (Tensor): The predication coarse masks of\n                shape (num_rois, num_classes, small_size, small_size).\n\n        Returns:\n            Tensor: The refined masks of shape (num_rois, num_classes,\n                large_size, large_size).\n        \"\"\"\n        refined_mask_pred = mask_pred.clone()\n        for subdivision_step in range(self.test_cfg.subdivision_steps):\n            refined_mask_pred = F.interpolate(\n                refined_mask_pred,\n                scale_factor=self.test_cfg.scale_factor,\n                mode='bilinear',\n                align_corners=False)\n            # If `subdivision_num_points` is larger or equal to the\n            # resolution of the next step, then we can skip this step\n            num_rois, channels, mask_height, mask_width = \\\n                refined_mask_pred.shape\n            if (self.test_cfg.subdivision_num_points >=\n                    self.test_cfg.scale_factor**2 * mask_height * mask_width\n                    and\n                    subdivision_step < self.test_cfg.subdivision_steps - 1):\n                continue\n            point_indices, rel_roi_points = \\\n                self.point_head.get_roi_rel_points_test(\n                    refined_mask_pred, label_pred, cfg=self.test_cfg)\n            fine_grained_point_feats = self._onnx_get_fine_grained_point_feats(\n                x, rois, rel_roi_points)\n            coarse_point_feats = point_sample(mask_pred, rel_roi_points)\n            mask_point_pred = self.point_head(fine_grained_point_feats,\n                                              coarse_point_feats)\n\n            point_indices = point_indices.unsqueeze(1).expand(-1, channels, -1)\n            refined_mask_pred = refined_mask_pred.reshape(\n                num_rois, channels, mask_height * mask_width)\n\n            is_trt_backend = os.environ.get('ONNX_BACKEND') == 'MMCVTensorRT'\n            # avoid ScatterElements op in ONNX for TensorRT\n            if is_trt_backend:\n                mask_shape = refined_mask_pred.shape\n                point_shape = point_indices.shape\n                inds_dim0 = torch.arange(point_shape[0]).reshape(\n                    point_shape[0], 1, 1).expand_as(point_indices)\n                inds_dim1 = torch.arange(point_shape[1]).reshape(\n                    1, point_shape[1], 1).expand_as(point_indices)\n                inds_1d = inds_dim0.reshape(\n                    -1) * mask_shape[1] * mask_shape[2] + inds_dim1.reshape(\n                        -1) * mask_shape[2] + point_indices.reshape(-1)\n                refined_mask_pred = refined_mask_pred.reshape(-1)\n                refined_mask_pred[inds_1d] = mask_point_pred.reshape(-1)\n                refined_mask_pred = refined_mask_pred.reshape(*mask_shape)\n            else:\n                refined_mask_pred = refined_mask_pred.scatter_(\n                    2, point_indices, mask_point_pred)\n\n            refined_mask_pred = refined_mask_pred.view(num_rois, channels,\n                                                       mask_height, mask_width)\n\n        return refined_mask_pred\n\n    def mask_onnx_export(self, x, img_metas, det_bboxes, det_labels, **kwargs):\n        \"\"\"Export mask branch to onnx which supports batch inference.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            img_metas (list[dict]): Image meta info.\n            det_bboxes (Tensor): Bboxes and corresponding scores.\n                has shape [N, num_bboxes, 5].\n            det_labels (Tensor): class labels of\n                shape [N, num_bboxes].\n\n        Returns:\n            Tensor: The segmentation results of shape [N, num_bboxes,\n                image_height, image_width].\n        \"\"\"\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            raise RuntimeError('[ONNX Error] Can not record MaskHead '\n                               'as it has not been executed this time')\n        batch_size = det_bboxes.size(0)\n        # if det_bboxes is rescaled to the original image size, we need to\n        # rescale it back to the testing scale to obtain RoIs.\n        det_bboxes = det_bboxes[..., :4]\n        batch_index = torch.arange(\n            det_bboxes.size(0), device=det_bboxes.device).float().view(\n                -1, 1, 1).expand(det_bboxes.size(0), det_bboxes.size(1), 1)\n        mask_rois = torch.cat([batch_index, det_bboxes], dim=-1)\n        mask_rois = mask_rois.view(-1, 5)\n        mask_results = self._mask_forward(x, mask_rois)\n        mask_pred = mask_results['mask_pred']\n        max_shape = img_metas[0]['img_shape_for_onnx']\n        num_det = det_bboxes.shape[1]\n        det_bboxes = det_bboxes.reshape(-1, 4)\n        det_labels = det_labels.reshape(-1)\n\n        mask_pred = self._mask_point_onnx_export(x, mask_rois, det_labels,\n                                                 mask_pred)\n\n        segm_results = self.mask_head.onnx_export(mask_pred, det_bboxes,\n                                                  det_labels, self.test_cfg,\n                                                  max_shape)\n        segm_results = segm_results.reshape(batch_size, num_det, max_shape[0],\n                                            max_shape[1])\n        return segm_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/roi_extractors/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_roi_extractor import BaseRoIExtractor\nfrom .generic_roi_extractor import GenericRoIExtractor\nfrom .single_level_roi_extractor import SingleRoIExtractor\n\n__all__ = ['BaseRoIExtractor', 'SingleRoIExtractor', 'GenericRoIExtractor']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/roi_extractors/base_roi_extractor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nimport torch\nimport torch.nn as nn\nfrom mmcv import ops\nfrom mmcv.runner import BaseModule\n\n\nclass BaseRoIExtractor(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for RoI extractor.\n\n    Args:\n        roi_layer (dict): Specify RoI layer type and arguments.\n        out_channels (int): Output channels of RoI layers.\n        featmap_strides (int): Strides of input feature maps.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 roi_layer,\n                 out_channels,\n                 featmap_strides,\n                 init_cfg=None):\n        super(BaseRoIExtractor, self).__init__(init_cfg)\n        self.roi_layers = self.build_roi_layers(roi_layer, featmap_strides)\n        self.out_channels = out_channels\n        self.featmap_strides = featmap_strides\n        self.fp16_enabled = False\n\n    @property\n    def num_inputs(self):\n        \"\"\"int: Number of input feature maps.\"\"\"\n        return len(self.featmap_strides)\n\n    def build_roi_layers(self, layer_cfg, featmap_strides):\n        \"\"\"Build RoI operator to extract feature from each level feature map.\n\n        Args:\n            layer_cfg (dict): Dictionary to construct and config RoI layer\n                operation. Options are modules under ``mmcv/ops`` such as\n                ``RoIAlign``.\n            featmap_strides (List[int]): The stride of input feature map w.r.t\n                to the original image size, which would be used to scale RoI\n                coordinate (original image coordinate system) to feature\n                coordinate system.\n\n        Returns:\n            nn.ModuleList: The RoI extractor modules for each level feature\n                map.\n        \"\"\"\n\n        cfg = layer_cfg.copy()\n        layer_type = cfg.pop('type')\n        assert hasattr(ops, layer_type)\n        layer_cls = getattr(ops, layer_type)\n        roi_layers = nn.ModuleList(\n            [layer_cls(spatial_scale=1 / s, **cfg) for s in featmap_strides])\n        return roi_layers\n\n    def roi_rescale(self, rois, scale_factor):\n        \"\"\"Scale RoI coordinates by scale factor.\n\n        Args:\n            rois (torch.Tensor): RoI (Region of Interest), shape (n, 5)\n            scale_factor (float): Scale factor that RoI will be multiplied by.\n\n        Returns:\n            torch.Tensor: Scaled RoI.\n        \"\"\"\n\n        cx = (rois[:, 1] + rois[:, 3]) * 0.5\n        cy = (rois[:, 2] + rois[:, 4]) * 0.5\n        w = rois[:, 3] - rois[:, 1]\n        h = rois[:, 4] - rois[:, 2]\n        new_w = w * scale_factor\n        new_h = h * scale_factor\n        x1 = cx - new_w * 0.5\n        x2 = cx + new_w * 0.5\n        y1 = cy - new_h * 0.5\n        y2 = cy + new_h * 0.5\n        new_rois = torch.stack((rois[:, 0], x1, y1, x2, y2), dim=-1)\n        return new_rois\n\n    @abstractmethod\n    def forward(self, feats, rois, roi_scale_factor=None):\n        pass\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/roi_extractors/generic_roi_extractor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn.bricks import build_plugin_layer\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.models.builder import ROI_EXTRACTORS\nfrom .base_roi_extractor import BaseRoIExtractor\n\n\n@ROI_EXTRACTORS.register_module()\nclass GenericRoIExtractor(BaseRoIExtractor):\n    \"\"\"Extract RoI features from all level feature maps levels.\n\n    This is the implementation of `A novel Region of Interest Extraction Layer\n    for Instance Segmentation <https://arxiv.org/abs/2004.13665>`_.\n\n    Args:\n        aggregation (str): The method to aggregate multiple feature maps.\n            Options are 'sum', 'concat'. Default: 'sum'.\n        pre_cfg (dict | None): Specify pre-processing modules. Default: None.\n        post_cfg (dict | None): Specify post-processing modules. Default: None.\n        kwargs (keyword arguments): Arguments that are the same\n            as :class:`BaseRoIExtractor`.\n    \"\"\"\n\n    def __init__(self,\n                 aggregation='sum',\n                 pre_cfg=None,\n                 post_cfg=None,\n                 **kwargs):\n        super(GenericRoIExtractor, self).__init__(**kwargs)\n\n        assert aggregation in ['sum', 'concat']\n\n        self.aggregation = aggregation\n        self.with_post = post_cfg is not None\n        self.with_pre = pre_cfg is not None\n        # build pre/post processing modules\n        if self.with_post:\n            self.post_module = build_plugin_layer(post_cfg, '_post_module')[1]\n        if self.with_pre:\n            self.pre_module = build_plugin_layer(pre_cfg, '_pre_module')[1]\n\n    @force_fp32(apply_to=('feats', ), out_fp16=True)\n    def forward(self, feats, rois, roi_scale_factor=None):\n        \"\"\"Forward function.\"\"\"\n        if len(feats) == 1:\n            return self.roi_layers[0](feats[0], rois)\n\n        out_size = self.roi_layers[0].output_size\n        num_levels = len(feats)\n        roi_feats = feats[0].new_zeros(\n            rois.size(0), self.out_channels, *out_size)\n\n        # some times rois is an empty tensor\n        if roi_feats.shape[0] == 0:\n            return roi_feats\n\n        if roi_scale_factor is not None:\n            rois = self.roi_rescale(rois, roi_scale_factor)\n\n        # mark the starting channels for concat mode\n        start_channels = 0\n        for i in range(num_levels):\n            roi_feats_t = self.roi_layers[i](feats[i], rois)\n            end_channels = start_channels + roi_feats_t.size(1)\n            if self.with_pre:\n                # apply pre-processing to a RoI extracted from each layer\n                roi_feats_t = self.pre_module(roi_feats_t)\n            if self.aggregation == 'sum':\n                # and sum them all\n                roi_feats = roi_feats + roi_feats_t\n            else:\n                # and concat them along channel dimension\n                roi_feats[:, start_channels:end_channels] = roi_feats_t\n            # update channels starting position\n            start_channels = end_channels\n        # check if concat channels match at the end\n        if self.aggregation == 'concat':\n            assert start_channels == self.out_channels\n\n        if self.with_post:\n            # apply post-processing before return the result\n            roi_feats = self.post_module(roi_feats)\n        return roi_feats\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/roi_extractors/single_level_roi_extractor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.runner import force_fp32\n\nfrom mmdet.models.builder import ROI_EXTRACTORS\nfrom .base_roi_extractor import BaseRoIExtractor\n\n\n@ROI_EXTRACTORS.register_module()\nclass SingleRoIExtractor(BaseRoIExtractor):\n    \"\"\"Extract RoI features from a single level feature map.\n\n    If there are multiple input feature levels, each RoI is mapped to a level\n    according to its scale. The mapping rule is proposed in\n    `FPN <https://arxiv.org/abs/1612.03144>`_.\n\n    Args:\n        roi_layer (dict): Specify RoI layer type and arguments.\n        out_channels (int): Output channels of RoI layers.\n        featmap_strides (List[int]): Strides of input feature maps.\n        finest_scale (int): Scale threshold of mapping to level 0. Default: 56.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 roi_layer,\n                 out_channels,\n                 featmap_strides,\n                 finest_scale=56,\n                 init_cfg=None):\n        super(SingleRoIExtractor, self).__init__(roi_layer, out_channels,\n                                                 featmap_strides, init_cfg)\n        self.finest_scale = finest_scale\n\n    def map_roi_levels(self, rois, num_levels):\n        \"\"\"Map rois to corresponding feature levels by scales.\n\n        - scale < finest_scale * 2: level 0\n        - finest_scale * 2 <= scale < finest_scale * 4: level 1\n        - finest_scale * 4 <= scale < finest_scale * 8: level 2\n        - scale >= finest_scale * 8: level 3\n\n        Args:\n            rois (Tensor): Input RoIs, shape (k, 5).\n            num_levels (int): Total level number.\n\n        Returns:\n            Tensor: Level index (0-based) of each RoI, shape (k, )\n        \"\"\"\n        scale = torch.sqrt(\n            (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]))\n        target_lvls = torch.floor(torch.log2(scale / self.finest_scale + 1e-6))\n        target_lvls = target_lvls.clamp(min=0, max=num_levels - 1).long()\n        return target_lvls\n\n    @force_fp32(apply_to=('feats', ), out_fp16=True)\n    def forward(self, feats, rois, roi_scale_factor=None):\n        \"\"\"Forward function.\"\"\"\n        out_size = self.roi_layers[0].output_size\n        num_levels = len(feats)\n        expand_dims = (-1, self.out_channels * out_size[0] * out_size[1])\n        if torch.onnx.is_in_onnx_export():\n            # Work around to export mask-rcnn to onnx\n            roi_feats = rois[:, :1].clone().detach()\n            roi_feats = roi_feats.expand(*expand_dims)\n            roi_feats = roi_feats.reshape(-1, self.out_channels, *out_size)\n            roi_feats = roi_feats * 0\n        else:\n            roi_feats = feats[0].new_zeros(\n                rois.size(0), self.out_channels, *out_size)\n        # TODO: remove this when parrots supports\n        if torch.__version__ == 'parrots':\n            roi_feats.requires_grad = True\n\n        if num_levels == 1:\n            if len(rois) == 0:\n                return roi_feats\n            return self.roi_layers[0](feats[0], rois)\n\n        target_lvls = self.map_roi_levels(rois, num_levels)\n\n        if roi_scale_factor is not None:\n            rois = self.roi_rescale(rois, roi_scale_factor)\n\n        for i in range(num_levels):\n            mask = target_lvls == i\n            if torch.onnx.is_in_onnx_export():\n                # To keep all roi_align nodes exported to onnx\n                # and skip nonzero op\n                mask = mask.float().unsqueeze(-1)\n                # select target level rois and reset the rest rois to zero.\n                rois_i = rois.clone().detach()\n                rois_i = rois_i * mask\n                mask_exp = mask.expand(*expand_dims).reshape(roi_feats.shape)\n                roi_feats_t = self.roi_layers[i](feats[i], rois_i)\n                roi_feats_t = roi_feats_t * mask_exp\n                roi_feats = roi_feats + roi_feats_t\n                continue\n            inds = mask.nonzero(as_tuple=False).squeeze(1)\n            if inds.numel() > 0:\n                rois_ = rois[inds]\n                roi_feats_t = self.roi_layers[i](feats[i], rois_)\n                roi_feats[inds] = roi_feats_t\n            else:\n                # Sometimes some pyramid levels will not be used for RoI\n                # feature extraction and this will cause an incomplete\n                # computation graph in one GPU, which is different from those\n                # in other GPUs and will cause a hanging error.\n                # Therefore, we add it to ensure each feature pyramid is\n                # included in the computation graph to avoid runtime bugs.\n                roi_feats = roi_feats + sum(\n                    x.view(-1)[0]\n                    for x in self.parameters()) * 0. + feats[i].sum() * 0.\n        return roi_feats\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/scnet_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nimport torch.nn.functional as F\n\nfrom mmdet.core import (bbox2result, bbox2roi, bbox_mapping, merge_aug_bboxes,\n                        merge_aug_masks, multiclass_nms)\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom ..utils.brick_wrappers import adaptive_avg_pool2d\nfrom .cascade_roi_head import CascadeRoIHead\n\n\n@HEADS.register_module()\nclass SCNetRoIHead(CascadeRoIHead):\n    \"\"\"RoIHead for `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    Args:\n        num_stages (int): number of cascade stages.\n        stage_loss_weights (list): loss weight of cascade stages.\n        semantic_roi_extractor (dict): config to init semantic roi extractor.\n        semantic_head (dict): config to init semantic head.\n        feat_relay_head (dict): config to init feature_relay_head.\n        glbctx_head (dict): config to init global context head.\n    \"\"\"\n\n    def __init__(self,\n                 num_stages,\n                 stage_loss_weights,\n                 semantic_roi_extractor=None,\n                 semantic_head=None,\n                 feat_relay_head=None,\n                 glbctx_head=None,\n                 **kwargs):\n        super(SCNetRoIHead, self).__init__(num_stages, stage_loss_weights,\n                                           **kwargs)\n        assert self.with_bbox and self.with_mask\n        assert not self.with_shared_head  # shared head is not supported\n\n        if semantic_head is not None:\n            self.semantic_roi_extractor = build_roi_extractor(\n                semantic_roi_extractor)\n            self.semantic_head = build_head(semantic_head)\n\n        if feat_relay_head is not None:\n            self.feat_relay_head = build_head(feat_relay_head)\n\n        if glbctx_head is not None:\n            self.glbctx_head = build_head(glbctx_head)\n\n    def init_mask_head(self, mask_roi_extractor, mask_head):\n        \"\"\"Initialize ``mask_head``\"\"\"\n        if mask_roi_extractor is not None:\n            self.mask_roi_extractor = build_roi_extractor(mask_roi_extractor)\n            self.mask_head = build_head(mask_head)\n\n    @property\n    def with_semantic(self):\n        \"\"\"bool: whether the head has semantic head\"\"\"\n        return hasattr(self,\n                       'semantic_head') and self.semantic_head is not None\n\n    @property\n    def with_feat_relay(self):\n        \"\"\"bool: whether the head has feature relay head\"\"\"\n        return (hasattr(self, 'feat_relay_head')\n                and self.feat_relay_head is not None)\n\n    @property\n    def with_glbctx(self):\n        \"\"\"bool: whether the head has global context head\"\"\"\n        return hasattr(self, 'glbctx_head') and self.glbctx_head is not None\n\n    def _fuse_glbctx(self, roi_feats, glbctx_feat, rois):\n        \"\"\"Fuse global context feats with roi feats.\"\"\"\n        assert roi_feats.size(0) == rois.size(0)\n        img_inds = torch.unique(rois[:, 0].cpu(), sorted=True).long()\n        fused_feats = torch.zeros_like(roi_feats)\n        for img_id in img_inds:\n            inds = (rois[:, 0] == img_id.item())\n            fused_feats[inds] = roi_feats[inds] + glbctx_feat[img_id]\n        return fused_feats\n\n    def _slice_pos_feats(self, feats, sampling_results):\n        \"\"\"Get features from pos rois.\"\"\"\n        num_rois = [res.bboxes.size(0) for res in sampling_results]\n        num_pos_rois = [res.pos_bboxes.size(0) for res in sampling_results]\n        inds = torch.zeros(sum(num_rois), dtype=torch.bool)\n        start = 0\n        for i in range(len(num_rois)):\n            start = 0 if i == 0 else start + num_rois[i - 1]\n            stop = start + num_pos_rois[i]\n            inds[start:stop] = 1\n        sliced_feats = feats[inds]\n        return sliced_feats\n\n    def _bbox_forward(self,\n                      stage,\n                      x,\n                      rois,\n                      semantic_feat=None,\n                      glbctx_feat=None):\n        \"\"\"Box head forward function used in both training and testing.\"\"\"\n        bbox_roi_extractor = self.bbox_roi_extractor[stage]\n        bbox_head = self.bbox_head[stage]\n        bbox_feats = bbox_roi_extractor(\n            x[:len(bbox_roi_extractor.featmap_strides)], rois)\n        if self.with_semantic and semantic_feat is not None:\n            bbox_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             rois)\n            if bbox_semantic_feat.shape[-2:] != bbox_feats.shape[-2:]:\n                bbox_semantic_feat = adaptive_avg_pool2d(\n                    bbox_semantic_feat, bbox_feats.shape[-2:])\n            bbox_feats = bbox_feats + bbox_semantic_feat\n        if self.with_glbctx and glbctx_feat is not None:\n            bbox_feats = self._fuse_glbctx(bbox_feats, glbctx_feat, rois)\n        cls_score, bbox_pred, relayed_feat = bbox_head(\n            bbox_feats, return_shared_feat=True)\n\n        bbox_results = dict(\n            cls_score=cls_score,\n            bbox_pred=bbox_pred,\n            relayed_feat=relayed_feat)\n        return bbox_results\n\n    def _mask_forward(self,\n                      x,\n                      rois,\n                      semantic_feat=None,\n                      glbctx_feat=None,\n                      relayed_feat=None):\n        \"\"\"Mask head forward function used in both training and testing.\"\"\"\n        mask_feats = self.mask_roi_extractor(\n            x[:self.mask_roi_extractor.num_inputs], rois)\n        if self.with_semantic and semantic_feat is not None:\n            mask_semantic_feat = self.semantic_roi_extractor([semantic_feat],\n                                                             rois)\n            if mask_semantic_feat.shape[-2:] != mask_feats.shape[-2:]:\n                mask_semantic_feat = F.adaptive_avg_pool2d(\n                    mask_semantic_feat, mask_feats.shape[-2:])\n            mask_feats = mask_feats + mask_semantic_feat\n        if self.with_glbctx and glbctx_feat is not None:\n            mask_feats = self._fuse_glbctx(mask_feats, glbctx_feat, rois)\n        if self.with_feat_relay and relayed_feat is not None:\n            mask_feats = mask_feats + relayed_feat\n        mask_pred = self.mask_head(mask_feats)\n        mask_results = dict(mask_pred=mask_pred)\n\n        return mask_results\n\n    def _bbox_forward_train(self,\n                            stage,\n                            x,\n                            sampling_results,\n                            gt_bboxes,\n                            gt_labels,\n                            rcnn_train_cfg,\n                            semantic_feat=None,\n                            glbctx_feat=None):\n        \"\"\"Run forward function and calculate loss for box head in training.\"\"\"\n        bbox_head = self.bbox_head[stage]\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(\n            stage,\n            x,\n            rois,\n            semantic_feat=semantic_feat,\n            glbctx_feat=glbctx_feat)\n\n        bbox_targets = bbox_head.get_targets(sampling_results, gt_bboxes,\n                                             gt_labels, rcnn_train_cfg)\n        loss_bbox = bbox_head.loss(bbox_results['cls_score'],\n                                   bbox_results['bbox_pred'], rois,\n                                   *bbox_targets)\n\n        bbox_results.update(\n            loss_bbox=loss_bbox, rois=rois, bbox_targets=bbox_targets)\n        return bbox_results\n\n    def _mask_forward_train(self,\n                            x,\n                            sampling_results,\n                            gt_masks,\n                            rcnn_train_cfg,\n                            semantic_feat=None,\n                            glbctx_feat=None,\n                            relayed_feat=None):\n        \"\"\"Run forward function and calculate loss for mask head in\n        training.\"\"\"\n        pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n        mask_results = self._mask_forward(\n            x,\n            pos_rois,\n            semantic_feat=semantic_feat,\n            glbctx_feat=glbctx_feat,\n            relayed_feat=relayed_feat)\n\n        mask_targets = self.mask_head.get_targets(sampling_results, gt_masks,\n                                                  rcnn_train_cfg)\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        loss_mask = self.mask_head.loss(mask_results['mask_pred'],\n                                        mask_targets, pos_labels)\n\n        mask_results = loss_mask\n        return mask_results\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      gt_semantic_seg=None):\n        \"\"\"\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            proposal_list (list[Tensors]): list of region proposals.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_ignore (None, list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_masks (None, Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n            gt_semantic_seg (None, list[Tensor]): semantic segmentation masks\n                used if the architecture supports semantic segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        losses = dict()\n\n        # semantic segmentation branch\n        if self.with_semantic:\n            semantic_pred, semantic_feat = self.semantic_head(x)\n            loss_seg = self.semantic_head.loss(semantic_pred, gt_semantic_seg)\n            losses['loss_semantic_seg'] = loss_seg\n        else:\n            semantic_feat = None\n\n        # global context branch\n        if self.with_glbctx:\n            mc_pred, glbctx_feat = self.glbctx_head(x)\n            loss_glbctx = self.glbctx_head.loss(mc_pred, gt_labels)\n            losses['loss_glbctx'] = loss_glbctx\n        else:\n            glbctx_feat = None\n\n        for i in range(self.num_stages):\n            self.current_stage = i\n            rcnn_train_cfg = self.train_cfg[i]\n            lw = self.stage_loss_weights[i]\n\n            # assign gts and sample proposals\n            sampling_results = []\n            bbox_assigner = self.bbox_assigner[i]\n            bbox_sampler = self.bbox_sampler[i]\n            num_imgs = len(img_metas)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n\n            for j in range(num_imgs):\n                assign_result = bbox_assigner.assign(proposal_list[j],\n                                                     gt_bboxes[j],\n                                                     gt_bboxes_ignore[j],\n                                                     gt_labels[j])\n                sampling_result = bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[j],\n                    gt_bboxes[j],\n                    gt_labels[j],\n                    feats=[lvl_feat[j][None] for lvl_feat in x])\n                sampling_results.append(sampling_result)\n\n            bbox_results = \\\n                self._bbox_forward_train(\n                    i, x, sampling_results, gt_bboxes, gt_labels,\n                    rcnn_train_cfg, semantic_feat, glbctx_feat)\n            roi_labels = bbox_results['bbox_targets'][0]\n\n            for name, value in bbox_results['loss_bbox'].items():\n                losses[f's{i}.{name}'] = (\n                    value * lw if 'loss' in name else value)\n\n            # refine boxes\n            if i < self.num_stages - 1:\n                pos_is_gts = [res.pos_is_gt for res in sampling_results]\n                with torch.no_grad():\n                    proposal_list = self.bbox_head[i].refine_bboxes(\n                        bbox_results['rois'], roi_labels,\n                        bbox_results['bbox_pred'], pos_is_gts, img_metas)\n\n        if self.with_feat_relay:\n            relayed_feat = self._slice_pos_feats(bbox_results['relayed_feat'],\n                                                 sampling_results)\n            relayed_feat = self.feat_relay_head(relayed_feat)\n        else:\n            relayed_feat = None\n\n        mask_results = self._mask_forward_train(x, sampling_results, gt_masks,\n                                                rcnn_train_cfg, semantic_feat,\n                                                glbctx_feat, relayed_feat)\n        mask_lw = sum(self.stage_loss_weights)\n        losses['loss_mask'] = mask_lw * mask_results['loss_mask']\n\n        return losses\n\n    def simple_test(self, x, proposal_list, img_metas, rescale=False):\n        \"\"\"Test without augmentation.\n\n        Args:\n            x (tuple[Tensor]): Features from upstream network. Each\n                has shape (batch_size, c, h, w).\n            proposal_list (list(Tensor)): Proposals from rpn head.\n                Each has shape (num_proposals, 5), last dimension\n                5 represent (x1, y1, x2, y2, score).\n            img_metas (list[dict]): Meta information of images.\n            rescale (bool): Whether to rescale the results to\n                the original image. Default: True.\n\n        Returns:\n            list[list[np.ndarray]] or list[tuple]: When no mask branch,\n            it is bbox results of each image and classes with type\n            `list[list[np.ndarray]]`. The outer list\n            corresponds to each image. The inner list\n            corresponds to each class. When the model has mask branch,\n            it contains bbox results and mask results.\n            The outer list corresponds to each image, and first element\n            of tuple is bbox results, second element is mask results.\n        \"\"\"\n        if self.with_semantic:\n            _, semantic_feat = self.semantic_head(x)\n        else:\n            semantic_feat = None\n\n        if self.with_glbctx:\n            mc_pred, glbctx_feat = self.glbctx_head(x)\n        else:\n            glbctx_feat = None\n\n        num_imgs = len(proposal_list)\n        img_shapes = tuple(meta['img_shape'] for meta in img_metas)\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        # \"ms\" in variable names means multi-stage\n        ms_scores = []\n        rcnn_test_cfg = self.test_cfg\n\n        rois = bbox2roi(proposal_list)\n\n        if rois.shape[0] == 0:\n            # There is no proposal in the whole batch\n            bbox_results = [[\n                np.zeros((0, 5), dtype=np.float32)\n                for _ in range(self.bbox_head[-1].num_classes)\n            ]] * num_imgs\n\n            if self.with_mask:\n                mask_classes = self.mask_head.num_classes\n                segm_results = [[[] for _ in range(mask_classes)]\n                                for _ in range(num_imgs)]\n                results = list(zip(bbox_results, segm_results))\n            else:\n                results = bbox_results\n\n            return results\n\n        for i in range(self.num_stages):\n            bbox_head = self.bbox_head[i]\n            bbox_results = self._bbox_forward(\n                i,\n                x,\n                rois,\n                semantic_feat=semantic_feat,\n                glbctx_feat=glbctx_feat)\n            # split batch bbox prediction back to each image\n            cls_score = bbox_results['cls_score']\n            bbox_pred = bbox_results['bbox_pred']\n            num_proposals_per_img = tuple(len(p) for p in proposal_list)\n            rois = rois.split(num_proposals_per_img, 0)\n            cls_score = cls_score.split(num_proposals_per_img, 0)\n            bbox_pred = bbox_pred.split(num_proposals_per_img, 0)\n            ms_scores.append(cls_score)\n\n            if i < self.num_stages - 1:\n                refine_rois_list = []\n                for j in range(num_imgs):\n                    if rois[j].shape[0] > 0:\n                        bbox_label = cls_score[j][:, :-1].argmax(dim=1)\n                        refine_rois = bbox_head.regress_by_class(\n                            rois[j], bbox_label, bbox_pred[j], img_metas[j])\n                        refine_rois_list.append(refine_rois)\n                rois = torch.cat(refine_rois_list)\n\n        # average scores of each image by stages\n        cls_score = [\n            sum([score[i] for score in ms_scores]) / float(len(ms_scores))\n            for i in range(num_imgs)\n        ]\n\n        # apply bbox post-processing to each image individually\n        det_bboxes = []\n        det_labels = []\n        for i in range(num_imgs):\n            det_bbox, det_label = self.bbox_head[-1].get_bboxes(\n                rois[i],\n                cls_score[i],\n                bbox_pred[i],\n                img_shapes[i],\n                scale_factors[i],\n                rescale=rescale,\n                cfg=rcnn_test_cfg)\n            det_bboxes.append(det_bbox)\n            det_labels.append(det_label)\n        det_bbox_results = [\n            bbox2result(det_bboxes[i], det_labels[i],\n                        self.bbox_head[-1].num_classes)\n            for i in range(num_imgs)\n        ]\n\n        if self.with_mask:\n            if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n                mask_classes = self.mask_head.num_classes\n                det_segm_results = [[[] for _ in range(mask_classes)]\n                                    for _ in range(num_imgs)]\n            else:\n                if rescale and not isinstance(scale_factors[0], float):\n                    scale_factors = [\n                        torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                        for scale_factor in scale_factors\n                    ]\n                _bboxes = [\n                    det_bboxes[i][:, :4] *\n                    scale_factors[i] if rescale else det_bboxes[i]\n                    for i in range(num_imgs)\n                ]\n                mask_rois = bbox2roi(_bboxes)\n\n                # get relay feature on mask_rois\n                bbox_results = self._bbox_forward(\n                    -1,\n                    x,\n                    mask_rois,\n                    semantic_feat=semantic_feat,\n                    glbctx_feat=glbctx_feat)\n                relayed_feat = bbox_results['relayed_feat']\n                relayed_feat = self.feat_relay_head(relayed_feat)\n\n                mask_results = self._mask_forward(\n                    x,\n                    mask_rois,\n                    semantic_feat=semantic_feat,\n                    glbctx_feat=glbctx_feat,\n                    relayed_feat=relayed_feat)\n                mask_pred = mask_results['mask_pred']\n\n                # split batch mask prediction back to each image\n                num_bbox_per_img = tuple(len(_bbox) for _bbox in _bboxes)\n                mask_preds = mask_pred.split(num_bbox_per_img, 0)\n\n                # apply mask post-processing to each image individually\n                det_segm_results = []\n                for i in range(num_imgs):\n                    if det_bboxes[i].shape[0] == 0:\n                        det_segm_results.append(\n                            [[] for _ in range(self.mask_head.num_classes)])\n                    else:\n                        segm_result = self.mask_head.get_seg_masks(\n                            mask_preds[i], _bboxes[i], det_labels[i],\n                            self.test_cfg, ori_shapes[i], scale_factors[i],\n                            rescale)\n                        det_segm_results.append(segm_result)\n\n        # return results\n        if self.with_mask:\n            return list(zip(det_bbox_results, det_segm_results))\n        else:\n            return det_bbox_results\n\n    def aug_test(self, img_feats, proposal_list, img_metas, rescale=False):\n        if self.with_semantic:\n            semantic_feats = [\n                self.semantic_head(feat)[1] for feat in img_feats\n            ]\n        else:\n            semantic_feats = [None] * len(img_metas)\n\n        if self.with_glbctx:\n            glbctx_feats = [self.glbctx_head(feat)[1] for feat in img_feats]\n        else:\n            glbctx_feats = [None] * len(img_metas)\n\n        rcnn_test_cfg = self.test_cfg\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta, semantic_feat, glbctx_feat in zip(\n                img_feats, img_metas, semantic_feats, glbctx_feats):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n\n            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                     scale_factor, flip)\n            # \"ms\" in variable names means multi-stage\n            ms_scores = []\n\n            rois = bbox2roi([proposals])\n\n            if rois.shape[0] == 0:\n                # There is no proposal in the single image\n                aug_bboxes.append(rois.new_zeros(0, 4))\n                aug_scores.append(rois.new_zeros(0, 1))\n                continue\n\n            for i in range(self.num_stages):\n                bbox_head = self.bbox_head[i]\n                bbox_results = self._bbox_forward(\n                    i,\n                    x,\n                    rois,\n                    semantic_feat=semantic_feat,\n                    glbctx_feat=glbctx_feat)\n                ms_scores.append(bbox_results['cls_score'])\n                if i < self.num_stages - 1:\n                    bbox_label = bbox_results['cls_score'].argmax(dim=1)\n                    rois = bbox_head.regress_by_class(\n                        rois, bbox_label, bbox_results['bbox_pred'],\n                        img_meta[0])\n\n            cls_score = sum(ms_scores) / float(len(ms_scores))\n            bboxes, scores = self.bbox_head[-1].get_bboxes(\n                rois,\n                cls_score,\n                bbox_results['bbox_pred'],\n                img_shape,\n                scale_factor,\n                rescale=False,\n                cfg=None)\n            aug_bboxes.append(bboxes)\n            aug_scores.append(scores)\n\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,\n                                                rcnn_test_cfg.score_thr,\n                                                rcnn_test_cfg.nms,\n                                                rcnn_test_cfg.max_per_img)\n\n        det_bbox_results = bbox2result(det_bboxes, det_labels,\n                                       self.bbox_head[-1].num_classes)\n\n        if self.with_mask:\n            if det_bboxes.shape[0] == 0:\n                det_segm_results = [[]\n                                    for _ in range(self.mask_head.num_classes)]\n            else:\n                aug_masks = []\n                for x, img_meta, semantic_feat, glbctx_feat in zip(\n                        img_feats, img_metas, semantic_feats, glbctx_feats):\n                    img_shape = img_meta[0]['img_shape']\n                    scale_factor = img_meta[0]['scale_factor']\n                    flip = img_meta[0]['flip']\n                    _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                           scale_factor, flip)\n                    mask_rois = bbox2roi([_bboxes])\n                    # get relay feature on mask_rois\n                    bbox_results = self._bbox_forward(\n                        -1,\n                        x,\n                        mask_rois,\n                        semantic_feat=semantic_feat,\n                        glbctx_feat=glbctx_feat)\n                    relayed_feat = bbox_results['relayed_feat']\n                    relayed_feat = self.feat_relay_head(relayed_feat)\n                    mask_results = self._mask_forward(\n                        x,\n                        mask_rois,\n                        semantic_feat=semantic_feat,\n                        glbctx_feat=glbctx_feat,\n                        relayed_feat=relayed_feat)\n                    mask_pred = mask_results['mask_pred']\n                    aug_masks.append(mask_pred.sigmoid().cpu().numpy())\n                merged_masks = merge_aug_masks(aug_masks, img_metas,\n                                               self.test_cfg)\n                ori_shape = img_metas[0][0]['ori_shape']\n                det_segm_results = self.mask_head.get_seg_masks(\n                    merged_masks,\n                    det_bboxes,\n                    det_labels,\n                    rcnn_test_cfg,\n                    ori_shape,\n                    scale_factor=1.0,\n                    rescale=False)\n            return [(det_bbox_results, det_segm_results)]\n        else:\n            return [det_bbox_results]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/shared_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .res_layer import ResLayer\n\n__all__ = ['ResLayer']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/shared_heads/res_layer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch.nn as nn\nfrom mmcv.runner import BaseModule, auto_fp16\n\nfrom mmdet.models.backbones import ResNet\nfrom mmdet.models.builder import SHARED_HEADS\nfrom mmdet.models.utils import ResLayer as _ResLayer\n\n\n@SHARED_HEADS.register_module()\nclass ResLayer(BaseModule):\n\n    def __init__(self,\n                 depth,\n                 stage=3,\n                 stride=2,\n                 dilation=1,\n                 style='pytorch',\n                 norm_cfg=dict(type='BN', requires_grad=True),\n                 norm_eval=True,\n                 with_cp=False,\n                 dcn=None,\n                 pretrained=None,\n                 init_cfg=None):\n        super(ResLayer, self).__init__(init_cfg)\n\n        self.norm_eval = norm_eval\n        self.norm_cfg = norm_cfg\n        self.stage = stage\n        self.fp16_enabled = False\n        block, stage_blocks = ResNet.arch_settings[depth]\n        stage_block = stage_blocks[stage]\n        planes = 64 * 2**stage\n        inplanes = 64 * 2**(stage - 1) * block.expansion\n\n        res_layer = _ResLayer(\n            block,\n            inplanes,\n            planes,\n            stage_block,\n            stride=stride,\n            dilation=dilation,\n            style=style,\n            with_cp=with_cp,\n            norm_cfg=self.norm_cfg,\n            dcn=dcn)\n        self.add_module(f'layer{stage + 1}', res_layer)\n\n        assert not (init_cfg and pretrained), \\\n            'init_cfg and pretrained cannot be specified at the same time'\n        if isinstance(pretrained, str):\n            warnings.warn('DeprecationWarning: pretrained is a deprecated, '\n                          'please use \"init_cfg\" instead')\n            self.init_cfg = dict(type='Pretrained', checkpoint=pretrained)\n        elif pretrained is None:\n            if init_cfg is None:\n                self.init_cfg = [\n                    dict(type='Kaiming', layer='Conv2d'),\n                    dict(\n                        type='Constant',\n                        val=1,\n                        layer=['_BatchNorm', 'GroupNorm'])\n                ]\n        else:\n            raise TypeError('pretrained must be a str or None')\n\n    @auto_fp16()\n    def forward(self, x):\n        res_layer = getattr(self, f'layer{self.stage + 1}')\n        out = res_layer(x)\n        return out\n\n    def train(self, mode=True):\n        super(ResLayer, self).train(mode)\n        if self.norm_eval:\n            for m in self.modules():\n                if isinstance(m, nn.BatchNorm2d):\n                    m.eval()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/sparse_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\n\nfrom mmdet.core import bbox2result, bbox2roi, bbox_xyxy_to_cxcywh\nfrom mmdet.core.bbox.samplers import PseudoSampler\nfrom ..builder import HEADS\nfrom .cascade_roi_head import CascadeRoIHead\n\n\n@HEADS.register_module()\nclass SparseRoIHead(CascadeRoIHead):\n    r\"\"\"The RoIHead for `Sparse R-CNN: End-to-End Object Detection with\n    Learnable Proposals <https://arxiv.org/abs/2011.12450>`_\n    and `Instances as Queries <http://arxiv.org/abs/2105.01928>`_\n\n    Args:\n        num_stages (int): Number of stage whole iterative process.\n            Defaults to 6.\n        stage_loss_weights (Tuple[float]): The loss\n            weight of each stage. By default all stages have\n            the same weight 1.\n        bbox_roi_extractor (dict): Config of box roi extractor.\n        mask_roi_extractor (dict): Config of mask roi extractor.\n        bbox_head (dict): Config of box head.\n        mask_head (dict): Config of mask head.\n        train_cfg (dict, optional): Configuration information in train stage.\n            Defaults to None.\n        test_cfg (dict, optional): Configuration information in test stage.\n            Defaults to None.\n        pretrained (str, optional): model pretrained path. Default: None\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    \"\"\"\n\n    def __init__(self,\n                 num_stages=6,\n                 stage_loss_weights=(1, 1, 1, 1, 1, 1),\n                 proposal_feature_channel=256,\n                 bbox_roi_extractor=dict(\n                     type='SingleRoIExtractor',\n                     roi_layer=dict(\n                         type='RoIAlign', output_size=7, sampling_ratio=2),\n                     out_channels=256,\n                     featmap_strides=[4, 8, 16, 32]),\n                 mask_roi_extractor=None,\n                 bbox_head=dict(\n                     type='DIIHead',\n                     num_classes=80,\n                     num_fcs=2,\n                     num_heads=8,\n                     num_cls_fcs=1,\n                     num_reg_fcs=3,\n                     feedforward_channels=2048,\n                     hidden_channels=256,\n                     dropout=0.0,\n                     roi_feat_size=7,\n                     ffn_act_cfg=dict(type='ReLU', inplace=True)),\n                 mask_head=None,\n                 train_cfg=None,\n                 test_cfg=None,\n                 pretrained=None,\n                 init_cfg=None):\n        assert bbox_roi_extractor is not None\n        assert bbox_head is not None\n        assert len(stage_loss_weights) == num_stages\n        self.num_stages = num_stages\n        self.stage_loss_weights = stage_loss_weights\n        self.proposal_feature_channel = proposal_feature_channel\n        super(SparseRoIHead, self).__init__(\n            num_stages,\n            stage_loss_weights,\n            bbox_roi_extractor=bbox_roi_extractor,\n            mask_roi_extractor=mask_roi_extractor,\n            bbox_head=bbox_head,\n            mask_head=mask_head,\n            train_cfg=train_cfg,\n            test_cfg=test_cfg,\n            pretrained=pretrained,\n            init_cfg=init_cfg)\n        # train_cfg would be None when run the test.py\n        if train_cfg is not None:\n            for stage in range(num_stages):\n                assert isinstance(self.bbox_sampler[stage], PseudoSampler), \\\n                    'Sparse R-CNN and QueryInst only support `PseudoSampler`'\n\n    def _bbox_forward(self, stage, x, rois, object_feats, img_metas):\n        \"\"\"Box head forward function used in both training and testing. Returns\n        all regression, classification results and a intermediate feature.\n\n        Args:\n            stage (int): The index of current stage in\n                iterative process.\n            x (List[Tensor]): List of FPN features\n            rois (Tensor): Rois in total batch. With shape (num_proposal, 5).\n                the last dimension 5 represents (img_index, x1, y1, x2, y2).\n            object_feats (Tensor): The object feature extracted from\n                the previous stage.\n            img_metas (dict): meta information of images.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of bbox head outputs,\n                Containing the following results:\n\n                    - cls_score (Tensor): The score of each class, has\n                      shape (batch_size, num_proposals, num_classes)\n                      when use focal loss or\n                      (batch_size, num_proposals, num_classes+1)\n                      otherwise.\n                    - decode_bbox_pred (Tensor): The regression results\n                      with shape (batch_size, num_proposal, 4).\n                      The last dimension 4 represents\n                      [tl_x, tl_y, br_x, br_y].\n                    - object_feats (Tensor): The object feature extracted\n                      from current stage\n                    - detach_cls_score_list (list[Tensor]): The detached\n                      classification results, length is batch_size, and\n                      each tensor has shape (num_proposal, num_classes).\n                    - detach_proposal_list (list[tensor]): The detached\n                      regression results, length is batch_size, and each\n                      tensor has shape (num_proposal, 4). The last\n                      dimension 4 represents [tl_x, tl_y, br_x, br_y].\n        \"\"\"\n        num_imgs = len(img_metas)\n        bbox_roi_extractor = self.bbox_roi_extractor[stage]\n        bbox_head = self.bbox_head[stage]\n        bbox_feats = bbox_roi_extractor(x[:bbox_roi_extractor.num_inputs],\n                                        rois)\n        cls_score, bbox_pred, object_feats, attn_feats = bbox_head(\n            bbox_feats, object_feats)\n        proposal_list = self.bbox_head[stage].refine_bboxes(\n            rois,\n            rois.new_zeros(len(rois)),  # dummy arg\n            bbox_pred.view(-1, bbox_pred.size(-1)),\n            [rois.new_zeros(object_feats.size(1)) for _ in range(num_imgs)],\n            img_metas)\n        bbox_results = dict(\n            cls_score=cls_score,\n            decode_bbox_pred=torch.cat(proposal_list),\n            object_feats=object_feats,\n            attn_feats=attn_feats,\n            # detach then use it in label assign\n            detach_cls_score_list=[\n                cls_score[i].detach() for i in range(num_imgs)\n            ],\n            detach_proposal_list=[item.detach() for item in proposal_list])\n\n        return bbox_results\n\n    def _mask_forward(self, stage, x, rois, attn_feats):\n        \"\"\"Mask head forward function used in both training and testing.\"\"\"\n        mask_roi_extractor = self.mask_roi_extractor[stage]\n        mask_head = self.mask_head[stage]\n        mask_feats = mask_roi_extractor(x[:mask_roi_extractor.num_inputs],\n                                        rois)\n        # do not support caffe_c4 model anymore\n        mask_pred = mask_head(mask_feats, attn_feats)\n\n        mask_results = dict(mask_pred=mask_pred)\n        return mask_results\n\n    def _mask_forward_train(self, stage, x, attn_feats, sampling_results,\n                            gt_masks, rcnn_train_cfg):\n        \"\"\"Run forward function and calculate loss for mask head in\n        training.\"\"\"\n        pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n        attn_feats = torch.cat([\n            feats[res.pos_inds]\n            for (feats, res) in zip(attn_feats, sampling_results)\n        ])\n        mask_results = self._mask_forward(stage, x, pos_rois, attn_feats)\n\n        mask_targets = self.mask_head[stage].get_targets(\n            sampling_results, gt_masks, rcnn_train_cfg)\n\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n\n        loss_mask = self.mask_head[stage].loss(mask_results['mask_pred'],\n                                               mask_targets, pos_labels)\n        mask_results.update(loss_mask)\n        return mask_results\n\n    def forward_train(self,\n                      x,\n                      proposal_boxes,\n                      proposal_features,\n                      img_metas,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      imgs_whwh=None,\n                      gt_masks=None):\n        \"\"\"Forward function in training stage.\n\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n            proposals (Tensor): Decoded proposal bboxes, has shape\n                (batch_size, num_proposals, 4)\n            proposal_features (Tensor): Expanded proposal\n                features, has shape\n                (batch_size, num_proposals, proposal_feature_channel)\n            img_metas (list[dict]): list of image info dict where\n                each dict has: 'img_shape', 'scale_factor', 'flip',\n                and may also contain 'filename', 'ori_shape',\n                'pad_shape', and 'img_norm_cfg'. For details on the\n                values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n            imgs_whwh (Tensor): Tensor with shape (batch_size, 4),\n                    the dimension means\n                    [img_width,img_height, img_width, img_height].\n            gt_masks (None | Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components of all stage.\n        \"\"\"\n\n        num_imgs = len(img_metas)\n        num_proposals = proposal_boxes.size(1)\n        imgs_whwh = imgs_whwh.repeat(1, num_proposals, 1)\n        all_stage_bbox_results = []\n        proposal_list = [proposal_boxes[i] for i in range(len(proposal_boxes))]\n        object_feats = proposal_features\n        all_stage_loss = {}\n        for stage in range(self.num_stages):\n            rois = bbox2roi(proposal_list)\n            bbox_results = self._bbox_forward(stage, x, rois, object_feats,\n                                              img_metas)\n            all_stage_bbox_results.append(bbox_results)\n            if gt_bboxes_ignore is None:\n                # TODO support ignore\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n            sampling_results = []\n            cls_pred_list = bbox_results['detach_cls_score_list']\n            proposal_list = bbox_results['detach_proposal_list']\n            for i in range(num_imgs):\n                normalize_bbox_ccwh = bbox_xyxy_to_cxcywh(proposal_list[i] /\n                                                          imgs_whwh[i])\n                assign_result = self.bbox_assigner[stage].assign(\n                    normalize_bbox_ccwh, cls_pred_list[i], gt_bboxes[i],\n                    gt_labels[i], img_metas[i])\n                sampling_result = self.bbox_sampler[stage].sample(\n                    assign_result, proposal_list[i], gt_bboxes[i])\n                sampling_results.append(sampling_result)\n            bbox_targets = self.bbox_head[stage].get_targets(\n                sampling_results, gt_bboxes, gt_labels, self.train_cfg[stage],\n                True)\n            cls_score = bbox_results['cls_score']\n            decode_bbox_pred = bbox_results['decode_bbox_pred']\n\n            single_stage_loss = self.bbox_head[stage].loss(\n                cls_score.view(-1, cls_score.size(-1)),\n                decode_bbox_pred.view(-1, 4),\n                *bbox_targets,\n                imgs_whwh=imgs_whwh)\n\n            if self.with_mask:\n                mask_results = self._mask_forward_train(\n                    stage, x, bbox_results['attn_feats'], sampling_results,\n                    gt_masks, self.train_cfg[stage])\n                single_stage_loss['loss_mask'] = mask_results['loss_mask']\n\n            for key, value in single_stage_loss.items():\n                all_stage_loss[f'stage{stage}_{key}'] = value * \\\n                                    self.stage_loss_weights[stage]\n            object_feats = bbox_results['object_feats']\n\n        return all_stage_loss\n\n    def simple_test(self,\n                    x,\n                    proposal_boxes,\n                    proposal_features,\n                    img_metas,\n                    imgs_whwh,\n                    rescale=False):\n        \"\"\"Test without augmentation.\n\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n            proposal_boxes (Tensor): Decoded proposal bboxes, has shape\n                (batch_size, num_proposals, 4)\n            proposal_features (Tensor): Expanded proposal\n                features, has shape\n                (batch_size, num_proposals, proposal_feature_channel)\n            img_metas (dict): meta information of images.\n            imgs_whwh (Tensor): Tensor with shape (batch_size, 4),\n                    the dimension means\n                    [img_width,img_height, img_width, img_height].\n            rescale (bool): If True, return boxes in original image\n                space. Defaults to False.\n\n        Returns:\n            list[list[np.ndarray]] or list[tuple]: When no mask branch,\n            it is bbox results of each image and classes with type\n            `list[list[np.ndarray]]`. The outer list\n            corresponds to each image. The inner list\n            corresponds to each class. When the model has a mask branch,\n            it is a list[tuple] that contains bbox results and mask results.\n            The outer list corresponds to each image, and first element\n            of tuple is bbox results, second element is mask results.\n        \"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        # Decode initial proposals\n        num_imgs = len(img_metas)\n        proposal_list = [proposal_boxes[i] for i in range(num_imgs)]\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        object_feats = proposal_features\n        if all([proposal.shape[0] == 0 for proposal in proposal_list]):\n            # There is no proposal in the whole batch\n            bbox_results = [[\n                np.zeros((0, 5), dtype=np.float32)\n                for i in range(self.bbox_head[-1].num_classes)\n            ]] * num_imgs\n            return bbox_results\n\n        for stage in range(self.num_stages):\n            rois = bbox2roi(proposal_list)\n            bbox_results = self._bbox_forward(stage, x, rois, object_feats,\n                                              img_metas)\n            object_feats = bbox_results['object_feats']\n            cls_score = bbox_results['cls_score']\n            proposal_list = bbox_results['detach_proposal_list']\n\n        if self.with_mask:\n            rois = bbox2roi(proposal_list)\n            mask_results = self._mask_forward(stage, x, rois,\n                                              bbox_results['attn_feats'])\n            mask_results['mask_pred'] = mask_results['mask_pred'].reshape(\n                num_imgs, -1, *mask_results['mask_pred'].size()[1:])\n\n        num_classes = self.bbox_head[-1].num_classes\n        det_bboxes = []\n        det_labels = []\n\n        if self.bbox_head[-1].loss_cls.use_sigmoid:\n            cls_score = cls_score.sigmoid()\n        else:\n            cls_score = cls_score.softmax(-1)[..., :-1]\n\n        for img_id in range(num_imgs):\n            cls_score_per_img = cls_score[img_id]\n            scores_per_img, topk_indices = cls_score_per_img.flatten(\n                0, 1).topk(\n                    self.test_cfg.max_per_img, sorted=False)\n            labels_per_img = topk_indices % num_classes\n            bbox_pred_per_img = proposal_list[img_id][topk_indices //\n                                                      num_classes]\n            if rescale:\n                scale_factor = img_metas[img_id]['scale_factor']\n                bbox_pred_per_img /= bbox_pred_per_img.new_tensor(scale_factor)\n            det_bboxes.append(\n                torch.cat([bbox_pred_per_img, scores_per_img[:, None]], dim=1))\n            det_labels.append(labels_per_img)\n\n        bbox_results = [\n            bbox2result(det_bboxes[i], det_labels[i], num_classes)\n            for i in range(num_imgs)\n        ]\n\n        if self.with_mask:\n            if rescale and not isinstance(scale_factors[0], float):\n                scale_factors = [\n                    torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                    for scale_factor in scale_factors\n                ]\n            _bboxes = [\n                det_bboxes[i][:, :4] *\n                scale_factors[i] if rescale else det_bboxes[i][:, :4]\n                for i in range(len(det_bboxes))\n            ]\n            segm_results = []\n            mask_pred = mask_results['mask_pred']\n            for img_id in range(num_imgs):\n                mask_pred_per_img = mask_pred[img_id].flatten(0,\n                                                              1)[topk_indices]\n                mask_pred_per_img = mask_pred_per_img[:, None, ...].repeat(\n                    1, num_classes, 1, 1)\n                segm_result = self.mask_head[-1].get_seg_masks(\n                    mask_pred_per_img, _bboxes[img_id], det_labels[img_id],\n                    self.test_cfg, ori_shapes[img_id], scale_factors[img_id],\n                    rescale)\n                segm_results.append(segm_result)\n\n        if self.with_mask:\n            results = list(zip(bbox_results, segm_results))\n        else:\n            results = bbox_results\n\n        return results\n\n    def aug_test(self, features, proposal_list, img_metas, rescale=False):\n        raise NotImplementedError(\n            'Sparse R-CNN and QueryInst does not support `aug_test`')\n\n    def forward_dummy(self, x, proposal_boxes, proposal_features, img_metas):\n        \"\"\"Dummy forward function when do the flops computing.\"\"\"\n        all_stage_bbox_results = []\n        proposal_list = [proposal_boxes[i] for i in range(len(proposal_boxes))]\n        object_feats = proposal_features\n        if self.with_bbox:\n            for stage in range(self.num_stages):\n                rois = bbox2roi(proposal_list)\n                bbox_results = self._bbox_forward(stage, x, rois, object_feats,\n                                                  img_metas)\n\n                all_stage_bbox_results.append((bbox_results, ))\n                proposal_list = bbox_results['detach_proposal_list']\n                object_feats = bbox_results['object_feats']\n\n                if self.with_mask:\n                    rois = bbox2roi(proposal_list)\n                    mask_results = self._mask_forward(\n                        stage, x, rois, bbox_results['attn_feats'])\n                    all_stage_bbox_results[-1] += (mask_results, )\n        return all_stage_bbox_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/standard_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import bbox2result, bbox2roi, build_assigner, build_sampler\nfrom ..builder import HEADS, build_head, build_roi_extractor\nfrom .base_roi_head import BaseRoIHead\nfrom .test_mixins import BBoxTestMixin, MaskTestMixin\n\n\n@HEADS.register_module()\nclass StandardRoIHead(BaseRoIHead, BBoxTestMixin, MaskTestMixin):\n    \"\"\"Simplest base roi head including one bbox head and one mask head.\"\"\"\n\n    def init_assigner_sampler(self):\n        \"\"\"Initialize assigner and sampler.\"\"\"\n        self.bbox_assigner = None\n        self.bbox_sampler = None\n        if self.train_cfg:\n            self.bbox_assigner = build_assigner(self.train_cfg.assigner)\n            self.bbox_sampler = build_sampler(\n                self.train_cfg.sampler, context=self)\n\n    def init_bbox_head(self, bbox_roi_extractor, bbox_head):\n        \"\"\"Initialize ``bbox_head``\"\"\"\n        self.bbox_roi_extractor = build_roi_extractor(bbox_roi_extractor)\n        self.bbox_head = build_head(bbox_head)\n\n    def init_mask_head(self, mask_roi_extractor, mask_head):\n        \"\"\"Initialize ``mask_head``\"\"\"\n        if mask_roi_extractor is not None:\n            self.mask_roi_extractor = build_roi_extractor(mask_roi_extractor)\n            self.share_roi_extractor = False\n        else:\n            self.share_roi_extractor = True\n            self.mask_roi_extractor = self.bbox_roi_extractor\n        self.mask_head = build_head(mask_head)\n\n    def forward_dummy(self, x, proposals):\n        \"\"\"Dummy forward function.\"\"\"\n        # bbox head\n        outs = ()\n        rois = bbox2roi([proposals])\n        if self.with_bbox:\n            bbox_results = self._bbox_forward(x, rois)\n            outs = outs + (bbox_results['cls_score'],\n                           bbox_results['bbox_pred'])\n        # mask head\n        if self.with_mask:\n            mask_rois = rois[:100]\n            mask_results = self._mask_forward(x, mask_rois)\n            outs = outs + (mask_results['mask_pred'], )\n        return outs\n\n    def forward_train(self,\n                      x,\n                      img_metas,\n                      proposal_list,\n                      gt_bboxes,\n                      gt_labels,\n                      gt_bboxes_ignore=None,\n                      gt_masks=None,\n                      **kwargs):\n        \"\"\"\n        Args:\n            x (list[Tensor]): list of multi-level img features.\n            img_metas (list[dict]): list of image info dict where each dict\n                has: 'img_shape', 'scale_factor', 'flip', and may also contain\n                'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n                For details on the values of these keys see\n                `mmdet/datasets/pipelines/formatting.py:Collect`.\n            proposals (list[Tensors]): list of region proposals.\n            gt_bboxes (list[Tensor]): Ground truth bboxes for each image with\n                shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.\n            gt_labels (list[Tensor]): class indices corresponding to each box\n            gt_bboxes_ignore (None | list[Tensor]): specify which bounding\n                boxes can be ignored when computing the loss.\n            gt_masks (None | Tensor) : true segmentation masks for each box\n                used if the architecture supports a segmentation task.\n\n        Returns:\n            dict[str, Tensor]: a dictionary of loss components\n        \"\"\"\n        # assign gts and sample proposals\n        if self.with_bbox or self.with_mask:\n            num_imgs = len(img_metas)\n            if gt_bboxes_ignore is None:\n                gt_bboxes_ignore = [None for _ in range(num_imgs)]\n            sampling_results = []\n            for i in range(num_imgs):\n                assign_result = self.bbox_assigner.assign(\n                    proposal_list[i], gt_bboxes[i], gt_bboxes_ignore[i],\n                    gt_labels[i])\n                sampling_result = self.bbox_sampler.sample(\n                    assign_result,\n                    proposal_list[i],\n                    gt_bboxes[i],\n                    gt_labels[i],\n                    feats=[lvl_feat[i][None] for lvl_feat in x])\n                sampling_results.append(sampling_result)\n\n        losses = dict()\n        # bbox head forward and loss\n        if self.with_bbox:\n            bbox_results = self._bbox_forward_train(x, sampling_results,\n                                                    gt_bboxes, gt_labels,\n                                                    img_metas)\n            losses.update(bbox_results['loss_bbox'])\n\n        # mask head forward and loss\n        if self.with_mask:\n            mask_results = self._mask_forward_train(x, sampling_results,\n                                                    bbox_results['bbox_feats'],\n                                                    gt_masks, img_metas)\n            losses.update(mask_results['loss_mask'])\n\n        return losses\n\n    def _bbox_forward(self, x, rois):\n        \"\"\"Box head forward function used in both training and testing.\"\"\"\n        # TODO: a more flexible way to decide which feature maps to use\n        bbox_feats = self.bbox_roi_extractor(\n            x[:self.bbox_roi_extractor.num_inputs], rois)\n        if self.with_shared_head:\n            bbox_feats = self.shared_head(bbox_feats)\n        cls_score, bbox_pred = self.bbox_head(bbox_feats)\n\n        bbox_results = dict(\n            cls_score=cls_score, bbox_pred=bbox_pred, bbox_feats=bbox_feats)\n        return bbox_results\n\n    def _bbox_forward_train(self, x, sampling_results, gt_bboxes, gt_labels,\n                            img_metas):\n        \"\"\"Run forward function and calculate loss for box head in training.\"\"\"\n        rois = bbox2roi([res.bboxes for res in sampling_results])\n        bbox_results = self._bbox_forward(x, rois)\n\n        bbox_targets = self.bbox_head.get_targets(sampling_results, gt_bboxes,\n                                                  gt_labels, self.train_cfg)\n        loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],\n                                        bbox_results['bbox_pred'], rois,\n                                        *bbox_targets)\n\n        bbox_results.update(loss_bbox=loss_bbox)\n        return bbox_results\n\n    def _mask_forward_train(self, x, sampling_results, bbox_feats, gt_masks,\n                            img_metas):\n        \"\"\"Run forward function and calculate loss for mask head in\n        training.\"\"\"\n        if not self.share_roi_extractor:\n            pos_rois = bbox2roi([res.pos_bboxes for res in sampling_results])\n            mask_results = self._mask_forward(x, pos_rois)\n        else:\n            pos_inds = []\n            device = bbox_feats.device\n            for res in sampling_results:\n                pos_inds.append(\n                    torch.ones(\n                        res.pos_bboxes.shape[0],\n                        device=device,\n                        dtype=torch.uint8))\n                pos_inds.append(\n                    torch.zeros(\n                        res.neg_bboxes.shape[0],\n                        device=device,\n                        dtype=torch.uint8))\n            pos_inds = torch.cat(pos_inds)\n\n            mask_results = self._mask_forward(\n                x, pos_inds=pos_inds, bbox_feats=bbox_feats)\n\n        mask_targets = self.mask_head.get_targets(sampling_results, gt_masks,\n                                                  self.train_cfg)\n        pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n        loss_mask = self.mask_head.loss(mask_results['mask_pred'],\n                                        mask_targets, pos_labels)\n\n        mask_results.update(loss_mask=loss_mask, mask_targets=mask_targets)\n        return mask_results\n\n    def _mask_forward(self, x, rois=None, pos_inds=None, bbox_feats=None):\n        \"\"\"Mask head forward function used in both training and testing.\"\"\"\n        assert ((rois is not None) ^\n                (pos_inds is not None and bbox_feats is not None))\n        if rois is not None:\n            mask_feats = self.mask_roi_extractor(\n                x[:self.mask_roi_extractor.num_inputs], rois)\n            if self.with_shared_head:\n                mask_feats = self.shared_head(mask_feats)\n        else:\n            assert bbox_feats is not None\n            mask_feats = bbox_feats[pos_inds]\n\n        mask_pred = self.mask_head(mask_feats)\n        mask_results = dict(mask_pred=mask_pred, mask_feats=mask_feats)\n        return mask_results\n\n    async def async_simple_test(self,\n                                x,\n                                proposal_list,\n                                img_metas,\n                                proposals=None,\n                                rescale=False):\n        \"\"\"Async test without augmentation.\"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n\n        det_bboxes, det_labels = await self.async_test_bboxes(\n            x, img_metas, proposal_list, self.test_cfg, rescale=rescale)\n        bbox_results = bbox2result(det_bboxes, det_labels,\n                                   self.bbox_head.num_classes)\n        if not self.with_mask:\n            return bbox_results\n        else:\n            segm_results = await self.async_test_mask(\n                x,\n                img_metas,\n                det_bboxes,\n                det_labels,\n                rescale=rescale,\n                mask_test_cfg=self.test_cfg.get('mask'))\n            return bbox_results, segm_results\n\n    def simple_test(self,\n                    x,\n                    proposal_list,\n                    img_metas,\n                    proposals=None,\n                    rescale=False):\n        \"\"\"Test without augmentation.\n\n        Args:\n            x (tuple[Tensor]): Features from upstream network. Each\n                has shape (batch_size, c, h, w).\n            proposal_list (list(Tensor)): Proposals from rpn head.\n                Each has shape (num_proposals, 5), last dimension\n                5 represent (x1, y1, x2, y2, score).\n            img_metas (list[dict]): Meta information of images.\n            rescale (bool): Whether to rescale the results to\n                the original image. Default: True.\n\n        Returns:\n            list[list[np.ndarray]] or list[tuple]: When no mask branch,\n            it is bbox results of each image and classes with type\n            `list[list[np.ndarray]]`. The outer list\n            corresponds to each image. The inner list\n            corresponds to each class. When the model has mask branch,\n            it contains bbox results and mask results.\n            The outer list corresponds to each image, and first element\n            of tuple is bbox results, second element is mask results.\n        \"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n\n        det_bboxes, det_labels = self.simple_test_bboxes(\n            x, img_metas, proposal_list, self.test_cfg, rescale=rescale)\n\n        bbox_results = [\n            bbox2result(det_bboxes[i], det_labels[i],\n                        self.bbox_head.num_classes)\n            for i in range(len(det_bboxes))\n        ]\n\n        if not self.with_mask:\n            return bbox_results\n        else:\n            segm_results = self.simple_test_mask(\n                x, img_metas, det_bboxes, det_labels, rescale=rescale)\n            return list(zip(bbox_results, segm_results))\n\n    def aug_test(self, x, proposal_list, img_metas, rescale=False):\n        \"\"\"Test with augmentations.\n\n        If rescale is False, then returned bboxes and masks will fit the scale\n        of imgs[0].\n        \"\"\"\n        det_bboxes, det_labels = self.aug_test_bboxes(x, img_metas,\n                                                      proposal_list,\n                                                      self.test_cfg)\n        if rescale:\n            _det_bboxes = det_bboxes\n        else:\n            _det_bboxes = det_bboxes.clone()\n            _det_bboxes[:, :4] *= det_bboxes.new_tensor(\n                img_metas[0][0]['scale_factor'])\n        bbox_results = bbox2result(_det_bboxes, det_labels,\n                                   self.bbox_head.num_classes)\n\n        # det_bboxes always keep the original scale\n        if self.with_mask:\n            segm_results = self.aug_test_mask(x, img_metas, det_bboxes,\n                                              det_labels)\n            return [(bbox_results, segm_results)]\n        else:\n            return [bbox_results]\n\n    def onnx_export(self, x, proposals, img_metas, rescale=False):\n        \"\"\"Test without augmentation.\"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        det_bboxes, det_labels = self.bbox_onnx_export(\n            x, img_metas, proposals, self.test_cfg, rescale=rescale)\n\n        if not self.with_mask:\n            return det_bboxes, det_labels\n        else:\n            segm_results = self.mask_onnx_export(\n                x, img_metas, det_bboxes, det_labels, rescale=rescale)\n            return det_bboxes, det_labels, segm_results\n\n    def mask_onnx_export(self, x, img_metas, det_bboxes, det_labels, **kwargs):\n        \"\"\"Export mask branch to onnx which supports batch inference.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            img_metas (list[dict]): Image meta info.\n            det_bboxes (Tensor): Bboxes and corresponding scores.\n                has shape [N, num_bboxes, 5].\n            det_labels (Tensor): class labels of\n                shape [N, num_bboxes].\n\n        Returns:\n            Tensor: The segmentation results of shape [N, num_bboxes,\n                image_height, image_width].\n        \"\"\"\n        # image shapes of images in the batch\n\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            raise RuntimeError('[ONNX Error] Can not record MaskHead '\n                               'as it has not been executed this time')\n        batch_size = det_bboxes.size(0)\n        # if det_bboxes is rescaled to the original image size, we need to\n        # rescale it back to the testing scale to obtain RoIs.\n        det_bboxes = det_bboxes[..., :4]\n        batch_index = torch.arange(\n            det_bboxes.size(0), device=det_bboxes.device).float().view(\n                -1, 1, 1).expand(det_bboxes.size(0), det_bboxes.size(1), 1)\n        mask_rois = torch.cat([batch_index, det_bboxes], dim=-1)\n        mask_rois = mask_rois.view(-1, 5)\n        mask_results = self._mask_forward(x, mask_rois)\n        mask_pred = mask_results['mask_pred']\n        max_shape = img_metas[0]['img_shape_for_onnx']\n        num_det = det_bboxes.shape[1]\n        det_bboxes = det_bboxes.reshape(-1, 4)\n        det_labels = det_labels.reshape(-1)\n        segm_results = self.mask_head.onnx_export(mask_pred, det_bboxes,\n                                                  det_labels, self.test_cfg,\n                                                  max_shape)\n        segm_results = segm_results.reshape(batch_size, num_det, max_shape[0],\n                                            max_shape[1])\n        return segm_results\n\n    def bbox_onnx_export(self, x, img_metas, proposals, rcnn_test_cfg,\n                         **kwargs):\n        \"\"\"Export bbox branch to onnx which supports batch inference.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            img_metas (list[dict]): Image meta info.\n            proposals (Tensor): Region proposals with\n                batch dimension, has shape [N, num_bboxes, 5].\n            rcnn_test_cfg (obj:`ConfigDict`): `test_cfg` of R-CNN.\n\n        Returns:\n            tuple[Tensor, Tensor]: bboxes of shape [N, num_bboxes, 5]\n                and class labels of shape [N, num_bboxes].\n        \"\"\"\n        # get origin input shape to support onnx dynamic input shape\n        assert len(\n            img_metas\n        ) == 1, 'Only support one input image while in exporting to ONNX'\n        img_shapes = img_metas[0]['img_shape_for_onnx']\n\n        rois = proposals\n\n        batch_index = torch.arange(\n            rois.size(0), device=rois.device).float().view(-1, 1, 1).expand(\n                rois.size(0), rois.size(1), 1)\n\n        rois = torch.cat([batch_index, rois[..., :4]], dim=-1)\n        batch_size = rois.shape[0]\n        num_proposals_per_img = rois.shape[1]\n\n        # Eliminate the batch dimension\n        rois = rois.view(-1, 5)\n        bbox_results = self._bbox_forward(x, rois)\n        cls_score = bbox_results['cls_score']\n        bbox_pred = bbox_results['bbox_pred']\n\n        # Recover the batch dimension\n        rois = rois.reshape(batch_size, num_proposals_per_img, rois.size(-1))\n        cls_score = cls_score.reshape(batch_size, num_proposals_per_img,\n                                      cls_score.size(-1))\n\n        bbox_pred = bbox_pred.reshape(batch_size, num_proposals_per_img,\n                                      bbox_pred.size(-1))\n        det_bboxes, det_labels = self.bbox_head.onnx_export(\n            rois, cls_score, bbox_pred, img_shapes, cfg=rcnn_test_cfg)\n\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/test_mixins.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport sys\nimport warnings\n\nimport numpy as np\nimport torch\n\nfrom mmdet.core import (bbox2roi, bbox_mapping, merge_aug_bboxes,\n                        merge_aug_masks, multiclass_nms)\n\nif sys.version_info >= (3, 7):\n    from mmdet.utils.contextmanagers import completed\n\n\nclass BBoxTestMixin:\n\n    if sys.version_info >= (3, 7):\n\n        async def async_test_bboxes(self,\n                                    x,\n                                    img_metas,\n                                    proposals,\n                                    rcnn_test_cfg,\n                                    rescale=False,\n                                    **kwargs):\n            \"\"\"Asynchronized test for box head without augmentation.\"\"\"\n            rois = bbox2roi(proposals)\n            roi_feats = self.bbox_roi_extractor(\n                x[:len(self.bbox_roi_extractor.featmap_strides)], rois)\n            if self.with_shared_head:\n                roi_feats = self.shared_head(roi_feats)\n            sleep_interval = rcnn_test_cfg.get('async_sleep_interval', 0.017)\n\n            async with completed(\n                    __name__, 'bbox_head_forward',\n                    sleep_interval=sleep_interval):\n                cls_score, bbox_pred = self.bbox_head(roi_feats)\n\n            img_shape = img_metas[0]['img_shape']\n            scale_factor = img_metas[0]['scale_factor']\n            det_bboxes, det_labels = self.bbox_head.get_bboxes(\n                rois,\n                cls_score,\n                bbox_pred,\n                img_shape,\n                scale_factor,\n                rescale=rescale,\n                cfg=rcnn_test_cfg)\n            return det_bboxes, det_labels\n\n    def simple_test_bboxes(self,\n                           x,\n                           img_metas,\n                           proposals,\n                           rcnn_test_cfg,\n                           rescale=False):\n        \"\"\"Test only det bboxes without augmentation.\n\n        Args:\n            x (tuple[Tensor]): Feature maps of all scale level.\n            img_metas (list[dict]): Image meta info.\n            proposals (List[Tensor]): Region proposals.\n            rcnn_test_cfg (obj:`ConfigDict`): `test_cfg` of R-CNN.\n            rescale (bool): If True, return boxes in original image space.\n                Default: False.\n\n        Returns:\n            tuple[list[Tensor], list[Tensor]]: The first list contains\n                the boxes of the corresponding image in a batch, each\n                tensor has the shape (num_boxes, 5) and last dimension\n                5 represent (tl_x, tl_y, br_x, br_y, score). Each Tensor\n                in the second list is the labels with shape (num_boxes, ).\n                The length of both lists should be equal to batch_size.\n        \"\"\"\n\n        rois = bbox2roi(proposals)\n\n        if rois.shape[0] == 0:\n            batch_size = len(proposals)\n            det_bbox = rois.new_zeros(0, 5)\n            det_label = rois.new_zeros((0, ), dtype=torch.long)\n            if rcnn_test_cfg is None:\n                det_bbox = det_bbox[:, :4]\n                det_label = rois.new_zeros(\n                    (0, self.bbox_head.fc_cls.out_features))\n            # There is no proposal in the whole batch\n            return [det_bbox] * batch_size, [det_label] * batch_size\n\n        bbox_results = self._bbox_forward(x, rois)\n        img_shapes = tuple(meta['img_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        # split batch bbox prediction back to each image\n        cls_score = bbox_results['cls_score']\n        bbox_pred = bbox_results['bbox_pred']\n        num_proposals_per_img = tuple(len(p) for p in proposals)\n        rois = rois.split(num_proposals_per_img, 0)\n        cls_score = cls_score.split(num_proposals_per_img, 0)\n\n        # some detector with_reg is False, bbox_pred will be None\n        if bbox_pred is not None:\n            # TODO move this to a sabl_roi_head\n            # the bbox prediction of some detectors like SABL is not Tensor\n            if isinstance(bbox_pred, torch.Tensor):\n                bbox_pred = bbox_pred.split(num_proposals_per_img, 0)\n            else:\n                bbox_pred = self.bbox_head.bbox_pred_split(\n                    bbox_pred, num_proposals_per_img)\n        else:\n            bbox_pred = (None, ) * len(proposals)\n\n        # apply bbox post-processing to each image individually\n        det_bboxes = []\n        det_labels = []\n        for i in range(len(proposals)):\n            if rois[i].shape[0] == 0:\n                # There is no proposal in the single image\n                det_bbox = rois[i].new_zeros(0, 5)\n                det_label = rois[i].new_zeros((0, ), dtype=torch.long)\n                if rcnn_test_cfg is None:\n                    det_bbox = det_bbox[:, :4]\n                    det_label = rois[i].new_zeros(\n                        (0, self.bbox_head.fc_cls.out_features))\n\n            else:\n                det_bbox, det_label = self.bbox_head.get_bboxes(\n                    rois[i],\n                    cls_score[i],\n                    bbox_pred[i],\n                    img_shapes[i],\n                    scale_factors[i],\n                    rescale=rescale,\n                    cfg=rcnn_test_cfg)\n            det_bboxes.append(det_bbox)\n            det_labels.append(det_label)\n        return det_bboxes, det_labels\n\n    def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):\n        \"\"\"Test det bboxes with test time augmentation.\"\"\"\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta in zip(feats, img_metas):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n            flip_direction = img_meta[0]['flip_direction']\n            # TODO more flexible\n            proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                     scale_factor, flip, flip_direction)\n            rois = bbox2roi([proposals])\n            bbox_results = self._bbox_forward(x, rois)\n            bboxes, scores = self.bbox_head.get_bboxes(\n                rois,\n                bbox_results['cls_score'],\n                bbox_results['bbox_pred'],\n                img_shape,\n                scale_factor,\n                rescale=False,\n                cfg=None)\n            aug_bboxes.append(bboxes)\n            aug_scores.append(scores)\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        if merged_bboxes.shape[0] == 0:\n            # There is no proposal in the single image\n            det_bboxes = merged_bboxes.new_zeros(0, 5)\n            det_labels = merged_bboxes.new_zeros((0, ), dtype=torch.long)\n        else:\n            det_bboxes, det_labels = multiclass_nms(merged_bboxes,\n                                                    merged_scores,\n                                                    rcnn_test_cfg.score_thr,\n                                                    rcnn_test_cfg.nms,\n                                                    rcnn_test_cfg.max_per_img)\n        return det_bboxes, det_labels\n\n\nclass MaskTestMixin:\n\n    if sys.version_info >= (3, 7):\n\n        async def async_test_mask(self,\n                                  x,\n                                  img_metas,\n                                  det_bboxes,\n                                  det_labels,\n                                  rescale=False,\n                                  mask_test_cfg=None):\n            \"\"\"Asynchronized test for mask head without augmentation.\"\"\"\n            # image shape of the first image in the batch (only one)\n            ori_shape = img_metas[0]['ori_shape']\n            scale_factor = img_metas[0]['scale_factor']\n            if det_bboxes.shape[0] == 0:\n                segm_result = [[] for _ in range(self.mask_head.num_classes)]\n            else:\n                if rescale and not isinstance(scale_factor,\n                                              (float, torch.Tensor)):\n                    scale_factor = det_bboxes.new_tensor(scale_factor)\n                _bboxes = (\n                    det_bboxes[:, :4] *\n                    scale_factor if rescale else det_bboxes)\n                mask_rois = bbox2roi([_bboxes])\n                mask_feats = self.mask_roi_extractor(\n                    x[:len(self.mask_roi_extractor.featmap_strides)],\n                    mask_rois)\n\n                if self.with_shared_head:\n                    mask_feats = self.shared_head(mask_feats)\n                if mask_test_cfg and mask_test_cfg.get('async_sleep_interval'):\n                    sleep_interval = mask_test_cfg['async_sleep_interval']\n                else:\n                    sleep_interval = 0.035\n                async with completed(\n                        __name__,\n                        'mask_head_forward',\n                        sleep_interval=sleep_interval):\n                    mask_pred = self.mask_head(mask_feats)\n                segm_result = self.mask_head.get_seg_masks(\n                    mask_pred, _bboxes, det_labels, self.test_cfg, ori_shape,\n                    scale_factor, rescale)\n            return segm_result\n\n    def simple_test_mask(self,\n                         x,\n                         img_metas,\n                         det_bboxes,\n                         det_labels,\n                         rescale=False):\n        \"\"\"Simple test for mask head without augmentation.\"\"\"\n        # image shapes of images in the batch\n        ori_shapes = tuple(meta['ori_shape'] for meta in img_metas)\n        scale_factors = tuple(meta['scale_factor'] for meta in img_metas)\n\n        if isinstance(scale_factors[0], float):\n            warnings.warn(\n                'Scale factor in img_metas should be a '\n                'ndarray with shape (4,) '\n                'arrange as (factor_w, factor_h, factor_w, factor_h), '\n                'The scale_factor with float type has been deprecated. ')\n            scale_factors = np.array([scale_factors] * 4, dtype=np.float32)\n\n        num_imgs = len(det_bboxes)\n        if all(det_bbox.shape[0] == 0 for det_bbox in det_bboxes):\n            segm_results = [[[] for _ in range(self.mask_head.num_classes)]\n                            for _ in range(num_imgs)]\n        else:\n            # if det_bboxes is rescaled to the original image size, we need to\n            # rescale it back to the testing scale to obtain RoIs.\n            if rescale:\n                scale_factors = [\n                    torch.from_numpy(scale_factor).to(det_bboxes[0].device)\n                    for scale_factor in scale_factors\n                ]\n            _bboxes = [\n                det_bboxes[i][:, :4] *\n                scale_factors[i] if rescale else det_bboxes[i][:, :4]\n                for i in range(len(det_bboxes))\n            ]\n            mask_rois = bbox2roi(_bboxes)\n            mask_results = self._mask_forward(x, mask_rois)\n            mask_pred = mask_results['mask_pred']\n            # split batch mask prediction back to each image\n            num_mask_roi_per_img = [len(det_bbox) for det_bbox in det_bboxes]\n            mask_preds = mask_pred.split(num_mask_roi_per_img, 0)\n\n            # apply mask post-processing to each image individually\n            segm_results = []\n            for i in range(num_imgs):\n                if det_bboxes[i].shape[0] == 0:\n                    segm_results.append(\n                        [[] for _ in range(self.mask_head.num_classes)])\n                else:\n                    segm_result = self.mask_head.get_seg_masks(\n                        mask_preds[i], _bboxes[i], det_labels[i],\n                        self.test_cfg, ori_shapes[i], scale_factors[i],\n                        rescale)\n                    segm_results.append(segm_result)\n        return segm_results\n\n    def aug_test_mask(self, feats, img_metas, det_bboxes, det_labels):\n        \"\"\"Test for mask head with test time augmentation.\"\"\"\n        if det_bboxes.shape[0] == 0:\n            segm_result = [[] for _ in range(self.mask_head.num_classes)]\n        else:\n            aug_masks = []\n            for x, img_meta in zip(feats, img_metas):\n                img_shape = img_meta[0]['img_shape']\n                scale_factor = img_meta[0]['scale_factor']\n                flip = img_meta[0]['flip']\n                flip_direction = img_meta[0]['flip_direction']\n                _bboxes = bbox_mapping(det_bboxes[:, :4], img_shape,\n                                       scale_factor, flip, flip_direction)\n                mask_rois = bbox2roi([_bboxes])\n                mask_results = self._mask_forward(x, mask_rois)\n                # convert to numpy array to save memory\n                aug_masks.append(\n                    mask_results['mask_pred'].sigmoid().cpu().numpy())\n            merged_masks = merge_aug_masks(aug_masks, img_metas, self.test_cfg)\n\n            ori_shape = img_metas[0][0]['ori_shape']\n            scale_factor = det_bboxes.new_ones(4)\n            segm_result = self.mask_head.get_seg_masks(\n                merged_masks,\n                det_bboxes,\n                det_labels,\n                self.test_cfg,\n                ori_shape,\n                scale_factor=scale_factor,\n                rescale=False)\n        return segm_result\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/roi_heads/trident_roi_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import batched_nms\n\nfrom mmdet.core import (bbox2result, bbox2roi, bbox_mapping, merge_aug_bboxes,\n                        multiclass_nms)\nfrom mmdet.models.roi_heads.standard_roi_head import StandardRoIHead\nfrom ..builder import HEADS\n\n\n@HEADS.register_module()\nclass TridentRoIHead(StandardRoIHead):\n    \"\"\"Trident roi head.\n\n    Args:\n        num_branch (int): Number of branches in TridentNet.\n        test_branch_idx (int): In inference, all 3 branches will be used\n            if `test_branch_idx==-1`, otherwise only branch with index\n            `test_branch_idx` will be used.\n    \"\"\"\n\n    def __init__(self, num_branch, test_branch_idx, **kwargs):\n        self.num_branch = num_branch\n        self.test_branch_idx = test_branch_idx\n        super(TridentRoIHead, self).__init__(**kwargs)\n\n    def merge_trident_bboxes(self, trident_det_bboxes, trident_det_labels):\n        \"\"\"Merge bbox predictions of each branch.\"\"\"\n        if trident_det_bboxes.numel() == 0:\n            det_bboxes = trident_det_bboxes.new_zeros((0, 5))\n            det_labels = trident_det_bboxes.new_zeros((0, ), dtype=torch.long)\n        else:\n            nms_bboxes = trident_det_bboxes[:, :4]\n            nms_scores = trident_det_bboxes[:, 4].contiguous()\n            nms_inds = trident_det_labels\n            nms_cfg = self.test_cfg['nms']\n            det_bboxes, keep = batched_nms(nms_bboxes, nms_scores, nms_inds,\n                                           nms_cfg)\n            det_labels = trident_det_labels[keep]\n            if self.test_cfg['max_per_img'] > 0:\n                det_labels = det_labels[:self.test_cfg['max_per_img']]\n                det_bboxes = det_bboxes[:self.test_cfg['max_per_img']]\n\n        return det_bboxes, det_labels\n\n    def simple_test(self,\n                    x,\n                    proposal_list,\n                    img_metas,\n                    proposals=None,\n                    rescale=False):\n        \"\"\"Test without augmentation as follows:\n\n        1. Compute prediction bbox and label per branch.\n        2. Merge predictions of each branch according to scores of\n           bboxes, i.e., bboxes with higher score are kept to give\n           top-k prediction.\n        \"\"\"\n        assert self.with_bbox, 'Bbox head must be implemented.'\n        det_bboxes_list, det_labels_list = self.simple_test_bboxes(\n            x, img_metas, proposal_list, self.test_cfg, rescale=rescale)\n        num_branch = self.num_branch if self.test_branch_idx == -1 else 1\n        for _ in range(len(det_bboxes_list)):\n            if det_bboxes_list[_].shape[0] == 0:\n                det_bboxes_list[_] = det_bboxes_list[_].new_empty((0, 5))\n        det_bboxes, det_labels = [], []\n        for i in range(len(img_metas) // num_branch):\n            det_result = self.merge_trident_bboxes(\n                torch.cat(det_bboxes_list[i * num_branch:(i + 1) *\n                                          num_branch]),\n                torch.cat(det_labels_list[i * num_branch:(i + 1) *\n                                          num_branch]))\n            det_bboxes.append(det_result[0])\n            det_labels.append(det_result[1])\n\n        bbox_results = [\n            bbox2result(det_bboxes[i], det_labels[i],\n                        self.bbox_head.num_classes)\n            for i in range(len(det_bboxes))\n        ]\n        return bbox_results\n\n    def aug_test_bboxes(self, feats, img_metas, proposal_list, rcnn_test_cfg):\n        \"\"\"Test det bboxes with test time augmentation.\"\"\"\n        aug_bboxes = []\n        aug_scores = []\n        for x, img_meta in zip(feats, img_metas):\n            # only one image in the batch\n            img_shape = img_meta[0]['img_shape']\n            scale_factor = img_meta[0]['scale_factor']\n            flip = img_meta[0]['flip']\n            flip_direction = img_meta[0]['flip_direction']\n\n            trident_bboxes, trident_scores = [], []\n            for branch_idx in range(len(proposal_list)):\n                proposals = bbox_mapping(proposal_list[0][:, :4], img_shape,\n                                         scale_factor, flip, flip_direction)\n                rois = bbox2roi([proposals])\n                bbox_results = self._bbox_forward(x, rois)\n                bboxes, scores = self.bbox_head.get_bboxes(\n                    rois,\n                    bbox_results['cls_score'],\n                    bbox_results['bbox_pred'],\n                    img_shape,\n                    scale_factor,\n                    rescale=False,\n                    cfg=None)\n                trident_bboxes.append(bboxes)\n                trident_scores.append(scores)\n\n            aug_bboxes.append(torch.cat(trident_bboxes, 0))\n            aug_scores.append(torch.cat(trident_scores, 0))\n        # after merging, bboxes will be rescaled to the original image size\n        merged_bboxes, merged_scores = merge_aug_bboxes(\n            aug_bboxes, aug_scores, img_metas, rcnn_test_cfg)\n        det_bboxes, det_labels = multiclass_nms(merged_bboxes, merged_scores,\n                                                rcnn_test_cfg.score_thr,\n                                                rcnn_test_cfg.nms,\n                                                rcnn_test_cfg.max_per_img)\n        return det_bboxes, det_labels\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .panoptic_fpn_head import PanopticFPNHead  # noqa: F401,F403\nfrom .panoptic_fusion_heads import *  # noqa: F401,F403\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/base_semantic_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nimport torch.nn.functional as F\nfrom mmcv.runner import BaseModule, force_fp32\n\nfrom ..builder import build_loss\nfrom ..utils import interpolate_as\n\n\nclass BaseSemanticHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base module of Semantic Head.\n\n    Args:\n        num_classes (int): the number of classes.\n        init_cfg (dict): the initialization config.\n        loss_seg (dict): the loss of the semantic head.\n    \"\"\"\n\n    def __init__(self,\n                 num_classes,\n                 init_cfg=None,\n                 loss_seg=dict(\n                     type='CrossEntropyLoss',\n                     ignore_index=255,\n                     loss_weight=1.0)):\n        super(BaseSemanticHead, self).__init__(init_cfg)\n        self.loss_seg = build_loss(loss_seg)\n        self.num_classes = num_classes\n\n    @force_fp32(apply_to=('seg_preds', ))\n    def loss(self, seg_preds, gt_semantic_seg):\n        \"\"\"Get the loss of semantic head.\n\n        Args:\n            seg_preds (Tensor): The input logits with the shape (N, C, H, W).\n            gt_semantic_seg: The ground truth of semantic segmentation with\n                the shape (N, H, W).\n            label_bias: The starting number of the semantic label.\n                Default: 1.\n\n        Returns:\n            dict: the loss of semantic head.\n        \"\"\"\n        if seg_preds.shape[-2:] != gt_semantic_seg.shape[-2:]:\n            seg_preds = interpolate_as(seg_preds, gt_semantic_seg)\n        seg_preds = seg_preds.permute((0, 2, 3, 1))\n\n        loss_seg = self.loss_seg(\n            seg_preds.reshape(-1, self.num_classes),  # => [NxHxW, C]\n            gt_semantic_seg.reshape(-1).long())\n        return dict(loss_seg=loss_seg)\n\n    @abstractmethod\n    def forward(self, x):\n        \"\"\"Placeholder of forward function.\n\n        Returns:\n            dict[str, Tensor]: A dictionary, including features\n                and predicted scores. Required keys: 'seg_preds'\n                and 'feats'.\n        \"\"\"\n        pass\n\n    def forward_train(self, x, gt_semantic_seg):\n        output = self.forward(x)\n        seg_preds = output['seg_preds']\n        return self.loss(seg_preds, gt_semantic_seg)\n\n    def simple_test(self, x, img_metas, rescale=False):\n        output = self.forward(x)\n        seg_preds = output['seg_preds']\n        seg_preds = F.interpolate(\n            seg_preds,\n            size=img_metas[0]['pad_shape'][:2],\n            mode='bilinear',\n            align_corners=False)\n\n        if rescale:\n            h, w, _ = img_metas[0]['img_shape']\n            seg_preds = seg_preds[:, :, :h, :w]\n\n            h, w, _ = img_metas[0]['ori_shape']\n            seg_preds = F.interpolate(\n                seg_preds, size=(h, w), mode='bilinear', align_corners=False)\n        return seg_preds\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/panoptic_fpn_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import ModuleList\n\nfrom ..builder import HEADS\nfrom ..utils import ConvUpsample\nfrom .base_semantic_head import BaseSemanticHead\n\n\n@HEADS.register_module()\nclass PanopticFPNHead(BaseSemanticHead):\n    \"\"\"PanopticFPNHead used in Panoptic FPN.\n\n    In this head, the number of output channels is ``num_stuff_classes\n    + 1``, including all stuff classes and one thing class. The stuff\n    classes will be reset from ``0`` to ``num_stuff_classes - 1``, the\n    thing classes will be merged to ``num_stuff_classes``-th channel.\n\n    Arg:\n        num_things_classes (int): Number of thing classes. Default: 80.\n        num_stuff_classes (int): Number of stuff classes. Default: 53.\n        num_classes (int): Number of classes, including all stuff\n            classes and one thing class. This argument is deprecated,\n            please use ``num_things_classes`` and ``num_stuff_classes``.\n            The module will automatically infer the num_classes by\n            ``num_stuff_classes + 1``.\n        in_channels (int): Number of channels in the input feature\n            map.\n        inner_channels (int): Number of channels in inner features.\n        start_level (int): The start level of the input features\n            used in PanopticFPN.\n        end_level (int): The end level of the used features, the\n            ``end_level``-th layer will not be used.\n        fg_range (tuple): Range of the foreground classes. It starts\n            from ``0`` to ``num_things_classes-1``. Deprecated, please use\n             ``num_things_classes`` directly.\n        bg_range (tuple): Range of the background classes. It starts\n            from ``num_things_classes`` to ``num_things_classes +\n            num_stuff_classes - 1``. Deprecated, please use\n            ``num_stuff_classes`` and ``num_things_classes`` directly.\n        conv_cfg (dict): Dictionary to construct and config\n            conv layer. Default: None.\n        norm_cfg (dict): Dictionary to construct and config norm layer.\n            Use ``GN`` by default.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n        loss_seg (dict): the loss of the semantic head.\n    \"\"\"\n\n    def __init__(self,\n                 num_things_classes=80,\n                 num_stuff_classes=53,\n                 num_classes=None,\n                 in_channels=256,\n                 inner_channels=128,\n                 start_level=0,\n                 end_level=4,\n                 fg_range=None,\n                 bg_range=None,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n                 init_cfg=None,\n                 loss_seg=dict(\n                     type='CrossEntropyLoss', ignore_index=-1,\n                     loss_weight=1.0)):\n        if num_classes is not None:\n            warnings.warn(\n                '`num_classes` is deprecated now, please set '\n                '`num_stuff_classes` directly, the `num_classes` will be '\n                'set to `num_stuff_classes + 1`')\n            # num_classes = num_stuff_classes + 1 for PanopticFPN.\n            assert num_classes == num_stuff_classes + 1\n        super(PanopticFPNHead, self).__init__(num_stuff_classes + 1, init_cfg,\n                                              loss_seg)\n        self.num_things_classes = num_things_classes\n        self.num_stuff_classes = num_stuff_classes\n        if fg_range is not None and bg_range is not None:\n            self.fg_range = fg_range\n            self.bg_range = bg_range\n            self.num_things_classes = fg_range[1] - fg_range[0] + 1\n            self.num_stuff_classes = bg_range[1] - bg_range[0] + 1\n            warnings.warn(\n                '`fg_range` and `bg_range` are deprecated now, '\n                f'please use `num_things_classes`={self.num_things_classes} '\n                f'and `num_stuff_classes`={self.num_stuff_classes} instead.')\n\n        # Used feature layers are [start_level, end_level)\n        self.start_level = start_level\n        self.end_level = end_level\n        self.num_stages = end_level - start_level\n        self.inner_channels = inner_channels\n\n        self.conv_upsample_layers = ModuleList()\n        for i in range(start_level, end_level):\n            self.conv_upsample_layers.append(\n                ConvUpsample(\n                    in_channels,\n                    inner_channels,\n                    num_layers=i if i > 0 else 1,\n                    num_upsample=i if i > 0 else 0,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                ))\n        self.conv_logits = nn.Conv2d(inner_channels, self.num_classes, 1)\n\n    def _set_things_to_void(self, gt_semantic_seg):\n        \"\"\"Merge thing classes to one class.\n\n        In PanopticFPN, the background labels will be reset from `0` to\n        `self.num_stuff_classes-1`, the foreground labels will be merged to\n        `self.num_stuff_classes`-th channel.\n        \"\"\"\n        gt_semantic_seg = gt_semantic_seg.int()\n        fg_mask = gt_semantic_seg < self.num_things_classes\n        bg_mask = (gt_semantic_seg >= self.num_things_classes) * (\n            gt_semantic_seg < self.num_things_classes + self.num_stuff_classes)\n\n        new_gt_seg = torch.clone(gt_semantic_seg)\n        new_gt_seg = torch.where(bg_mask,\n                                 gt_semantic_seg - self.num_things_classes,\n                                 new_gt_seg)\n        new_gt_seg = torch.where(fg_mask,\n                                 fg_mask.int() * self.num_stuff_classes,\n                                 new_gt_seg)\n        return new_gt_seg\n\n    def loss(self, seg_preds, gt_semantic_seg):\n        \"\"\"The loss of PanopticFPN head.\n\n        Things classes will be merged to one class in PanopticFPN.\n        \"\"\"\n        gt_semantic_seg = self._set_things_to_void(gt_semantic_seg)\n        return super().loss(seg_preds, gt_semantic_seg)\n\n    def init_weights(self):\n        super().init_weights()\n        nn.init.normal_(self.conv_logits.weight.data, 0, 0.01)\n        self.conv_logits.bias.data.zero_()\n\n    def forward(self, x):\n        # the number of subnets must be not more than\n        # the length of features.\n        assert self.num_stages <= len(x)\n\n        feats = []\n        for i, layer in enumerate(self.conv_upsample_layers):\n            f = layer(x[self.start_level + i])\n            feats.append(f)\n\n        feats = torch.sum(torch.stack(feats, dim=0), dim=0)\n        seg_preds = self.conv_logits(feats)\n        out = dict(seg_preds=seg_preds, feats=feats)\n        return out\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/panoptic_fusion_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .base_panoptic_fusion_head import \\\n    BasePanopticFusionHead  # noqa: F401,F403\nfrom .heuristic_fusion_head import HeuristicFusionHead  # noqa: F401,F403\nfrom .maskformer_fusion_head import MaskFormerFusionHead  # noqa: F401,F403\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/panoptic_fusion_heads/base_panoptic_fusion_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom abc import ABCMeta, abstractmethod\n\nfrom mmcv.runner import BaseModule\n\nfrom ...builder import build_loss\n\n\nclass BasePanopticFusionHead(BaseModule, metaclass=ABCMeta):\n    \"\"\"Base class for panoptic heads.\"\"\"\n\n    def __init__(self,\n                 num_things_classes=80,\n                 num_stuff_classes=53,\n                 test_cfg=None,\n                 loss_panoptic=None,\n                 init_cfg=None,\n                 **kwargs):\n        super(BasePanopticFusionHead, self).__init__(init_cfg)\n        self.num_things_classes = num_things_classes\n        self.num_stuff_classes = num_stuff_classes\n        self.num_classes = num_things_classes + num_stuff_classes\n        self.test_cfg = test_cfg\n\n        if loss_panoptic:\n            self.loss_panoptic = build_loss(loss_panoptic)\n        else:\n            self.loss_panoptic = None\n\n    @property\n    def with_loss(self):\n        \"\"\"bool: whether the panoptic head contains loss function.\"\"\"\n        return self.loss_panoptic is not None\n\n    @abstractmethod\n    def forward_train(self, gt_masks=None, gt_semantic_seg=None, **kwargs):\n        \"\"\"Forward function during training.\"\"\"\n\n    @abstractmethod\n    def simple_test(self,\n                    img_metas,\n                    det_labels,\n                    mask_preds,\n                    seg_preds,\n                    det_bboxes,\n                    cfg=None,\n                    **kwargs):\n        \"\"\"Test without augmentation.\"\"\"\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/panoptic_fusion_heads/heuristic_fusion_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.evaluation.panoptic_utils import INSTANCE_OFFSET\nfrom mmdet.models.builder import HEADS\nfrom .base_panoptic_fusion_head import BasePanopticFusionHead\n\n\n@HEADS.register_module()\nclass HeuristicFusionHead(BasePanopticFusionHead):\n    \"\"\"Fusion Head with Heuristic method.\"\"\"\n\n    def __init__(self,\n                 num_things_classes=80,\n                 num_stuff_classes=53,\n                 test_cfg=None,\n                 init_cfg=None,\n                 **kwargs):\n        super(HeuristicFusionHead,\n              self).__init__(num_things_classes, num_stuff_classes, test_cfg,\n                             None, init_cfg, **kwargs)\n\n    def forward_train(self, gt_masks=None, gt_semantic_seg=None, **kwargs):\n        \"\"\"HeuristicFusionHead has no training loss.\"\"\"\n        return dict()\n\n    def _lay_masks(self, bboxes, labels, masks, overlap_thr=0.5):\n        \"\"\"Lay instance masks to a result map.\n\n        Args:\n            bboxes: The bboxes results, (K, 4).\n            labels: The labels of bboxes, (K, ).\n            masks: The instance masks, (K, H, W).\n            overlap_thr: Threshold to determine whether two masks overlap.\n                default: 0.5.\n\n        Returns:\n            Tensor: The result map, (H, W).\n        \"\"\"\n        num_insts = bboxes.shape[0]\n        id_map = torch.zeros(\n            masks.shape[-2:], device=bboxes.device, dtype=torch.long)\n        if num_insts == 0:\n            return id_map, labels\n\n        scores, bboxes = bboxes[:, -1], bboxes[:, :4]\n\n        # Sort by score to use heuristic fusion\n        order = torch.argsort(-scores)\n        bboxes = bboxes[order]\n        labels = labels[order]\n        segm_masks = masks[order]\n\n        instance_id = 1\n        left_labels = []\n        for idx in range(bboxes.shape[0]):\n            _cls = labels[idx]\n            _mask = segm_masks[idx]\n            instance_id_map = torch.ones_like(\n                _mask, dtype=torch.long) * instance_id\n            area = _mask.sum()\n            if area == 0:\n                continue\n\n            pasted = id_map > 0\n            intersect = (_mask * pasted).sum()\n            if (intersect / (area + 1e-5)) > overlap_thr:\n                continue\n\n            _part = _mask * (~pasted)\n            id_map = torch.where(_part, instance_id_map, id_map)\n            left_labels.append(_cls)\n            instance_id += 1\n\n        if len(left_labels) > 0:\n            instance_labels = torch.stack(left_labels)\n        else:\n            instance_labels = bboxes.new_zeros((0, ), dtype=torch.long)\n        assert instance_id == (len(instance_labels) + 1)\n        return id_map, instance_labels\n\n    def simple_test(self, det_bboxes, det_labels, mask_preds, seg_preds,\n                    **kwargs):\n        \"\"\"Fuse the results of instance and semantic segmentations.\n\n        Args:\n            det_bboxes: The bboxes results, (K, 4).\n            det_labels: The labels of bboxes, (K,).\n            mask_preds: The masks results, (K, H, W).\n            seg_preds: The semantic segmentation results,\n                (K, num_stuff + 1, H, W).\n\n        Returns:\n            Tensor : The panoptic segmentation result, (H, W).\n        \"\"\"\n        mask_preds = mask_preds >= self.test_cfg.mask_thr_binary\n        id_map, labels = self._lay_masks(det_bboxes, det_labels, mask_preds,\n                                         self.test_cfg.mask_overlap)\n\n        seg_results = seg_preds.argmax(dim=0)\n        seg_results = seg_results + self.num_things_classes\n\n        pan_results = seg_results\n        instance_id = 1\n        for idx in range(det_labels.shape[0]):\n            _mask = id_map == (idx + 1)\n            if _mask.sum() == 0:\n                continue\n            _cls = labels[idx]\n            # simply trust detection\n            segment_id = _cls + instance_id * INSTANCE_OFFSET\n            pan_results[_mask] = segment_id\n            instance_id += 1\n\n        ids, counts = torch.unique(\n            pan_results % INSTANCE_OFFSET, return_counts=True)\n        stuff_ids = ids[ids >= self.num_things_classes]\n        stuff_counts = counts[ids >= self.num_things_classes]\n        ignore_stuff_ids = stuff_ids[\n            stuff_counts < self.test_cfg.stuff_area_limit]\n\n        assert pan_results.ndim == 2\n        pan_results[(pan_results.unsqueeze(2) == ignore_stuff_ids.reshape(\n            1, 1, -1)).any(dim=2)] = self.num_classes\n\n        return pan_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/seg_heads/panoptic_fusion_heads/maskformer_fusion_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn.functional as F\n\nfrom mmdet.core.evaluation.panoptic_utils import INSTANCE_OFFSET\nfrom mmdet.core.mask import mask2bbox\nfrom mmdet.models.builder import HEADS\nfrom .base_panoptic_fusion_head import BasePanopticFusionHead\n\n\n@HEADS.register_module()\nclass MaskFormerFusionHead(BasePanopticFusionHead):\n\n    def __init__(self,\n                 num_things_classes=80,\n                 num_stuff_classes=53,\n                 test_cfg=None,\n                 loss_panoptic=None,\n                 init_cfg=None,\n                 **kwargs):\n        super().__init__(num_things_classes, num_stuff_classes, test_cfg,\n                         loss_panoptic, init_cfg, **kwargs)\n\n    def forward_train(self, **kwargs):\n        \"\"\"MaskFormerFusionHead has no training loss.\"\"\"\n        return dict()\n\n    def panoptic_postprocess(self, mask_cls, mask_pred):\n        \"\"\"Panoptic segmengation inference.\n\n        Args:\n            mask_cls (Tensor): Classfication outputs of shape\n                (num_queries, cls_out_channels) for a image.\n                Note `cls_out_channels` should includes\n                background.\n            mask_pred (Tensor): Mask outputs of shape\n                (num_queries, h, w) for a image.\n\n        Returns:\n            Tensor: Panoptic segment result of shape \\\n                (h, w), each element in Tensor means: \\\n                ``segment_id = _cls + instance_id * INSTANCE_OFFSET``.\n        \"\"\"\n        object_mask_thr = self.test_cfg.get('object_mask_thr', 0.8)\n        iou_thr = self.test_cfg.get('iou_thr', 0.8)\n        filter_low_score = self.test_cfg.get('filter_low_score', False)\n\n        scores, labels = F.softmax(mask_cls, dim=-1).max(-1)\n        mask_pred = mask_pred.sigmoid()\n\n        keep = labels.ne(self.num_classes) & (scores > object_mask_thr)\n        cur_scores = scores[keep]\n        cur_classes = labels[keep]\n        cur_masks = mask_pred[keep]\n\n        cur_prob_masks = cur_scores.view(-1, 1, 1) * cur_masks\n\n        h, w = cur_masks.shape[-2:]\n        panoptic_seg = torch.full((h, w),\n                                  self.num_classes,\n                                  dtype=torch.int32,\n                                  device=cur_masks.device)\n        if cur_masks.shape[0] == 0:\n            # We didn't detect any mask :(\n            pass\n        else:\n            cur_mask_ids = cur_prob_masks.argmax(0)\n            instance_id = 1\n            for k in range(cur_classes.shape[0]):\n                pred_class = int(cur_classes[k].item())\n                isthing = pred_class < self.num_things_classes\n                mask = cur_mask_ids == k\n                mask_area = mask.sum().item()\n                original_area = (cur_masks[k] >= 0.5).sum().item()\n\n                if filter_low_score:\n                    mask = mask & (cur_masks[k] >= 0.5)\n\n                if mask_area > 0 and original_area > 0:\n                    if mask_area / original_area < iou_thr:\n                        continue\n\n                    if not isthing:\n                        # different stuff regions of same class will be\n                        # merged here, and stuff share the instance_id 0.\n                        panoptic_seg[mask] = pred_class\n                    else:\n                        panoptic_seg[mask] = (\n                            pred_class + instance_id * INSTANCE_OFFSET)\n                        instance_id += 1\n\n        return panoptic_seg\n\n    def semantic_postprocess(self, mask_cls, mask_pred):\n        \"\"\"Semantic segmengation postprocess.\n\n        Args:\n            mask_cls (Tensor): Classfication outputs of shape\n                (num_queries, cls_out_channels) for a image.\n                Note `cls_out_channels` should includes\n                background.\n            mask_pred (Tensor): Mask outputs of shape\n                (num_queries, h, w) for a image.\n\n        Returns:\n            Tensor: Semantic segment result of shape \\\n                (cls_out_channels, h, w).\n        \"\"\"\n        # TODO add semantic segmentation result\n        raise NotImplementedError\n\n    def instance_postprocess(self, mask_cls, mask_pred):\n        \"\"\"Instance segmengation postprocess.\n\n        Args:\n            mask_cls (Tensor): Classfication outputs of shape\n                (num_queries, cls_out_channels) for a image.\n                Note `cls_out_channels` should includes\n                background.\n            mask_pred (Tensor): Mask outputs of shape\n                (num_queries, h, w) for a image.\n\n        Returns:\n            tuple[Tensor]: Instance segmentation results.\n\n            - labels_per_image (Tensor): Predicted labels,\\\n                shape (n, ).\n            - bboxes (Tensor): Bboxes and scores with shape (n, 5) of \\\n                positive region in binary mask, the last column is scores.\n            - mask_pred_binary (Tensor): Instance masks of \\\n                shape (n, h, w).\n        \"\"\"\n        max_per_image = self.test_cfg.get('max_per_image', 100)\n        num_queries = mask_cls.shape[0]\n        # shape (num_queries, num_class)\n        scores = F.softmax(mask_cls, dim=-1)[:, :-1]\n        # shape (num_queries * num_class, )\n        labels = torch.arange(self.num_classes, device=mask_cls.device).\\\n            unsqueeze(0).repeat(num_queries, 1).flatten(0, 1)\n        scores_per_image, top_indices = scores.flatten(0, 1).topk(\n            max_per_image, sorted=False)\n        labels_per_image = labels[top_indices]\n\n        query_indices = top_indices // self.num_classes\n        mask_pred = mask_pred[query_indices]\n\n        # extract things\n        is_thing = labels_per_image < self.num_things_classes\n        scores_per_image = scores_per_image[is_thing]\n        labels_per_image = labels_per_image[is_thing]\n        mask_pred = mask_pred[is_thing]\n\n        mask_pred_binary = (mask_pred > 0).float()\n        mask_scores_per_image = (mask_pred.sigmoid() *\n                                 mask_pred_binary).flatten(1).sum(1) / (\n                                     mask_pred_binary.flatten(1).sum(1) + 1e-6)\n        det_scores = scores_per_image * mask_scores_per_image\n        mask_pred_binary = mask_pred_binary.bool()\n        bboxes = mask2bbox(mask_pred_binary)\n        bboxes = torch.cat([bboxes, det_scores[:, None]], dim=-1)\n\n        return labels_per_image, bboxes, mask_pred_binary\n\n    def simple_test(self,\n                    mask_cls_results,\n                    mask_pred_results,\n                    img_metas,\n                    rescale=False,\n                    **kwargs):\n        \"\"\"Test segment without test-time aumengtation.\n\n        Only the output of last decoder layers was used.\n\n        Args:\n            mask_cls_results (Tensor): Mask classification logits,\n                shape (batch_size, num_queries, cls_out_channels).\n                Note `cls_out_channels` should includes background.\n            mask_pred_results (Tensor): Mask logits, shape\n                (batch_size, num_queries, h, w).\n            img_metas (list[dict]): List of image information.\n            rescale (bool, optional): If True, return boxes in\n                original image space. Default False.\n\n        Returns:\n            list[dict[str, Tensor | tuple[Tensor]]]: Semantic segmentation \\\n                results and panoptic segmentation results for each \\\n                image.\n\n            .. code-block:: none\n\n                [\n                    {\n                        'pan_results': Tensor, # shape = [h, w]\n                        'ins_results': tuple[Tensor],\n                        # semantic segmentation results are not supported yet\n                        'sem_results': Tensor\n                    },\n                    ...\n                ]\n        \"\"\"\n        panoptic_on = self.test_cfg.get('panoptic_on', True)\n        semantic_on = self.test_cfg.get('semantic_on', False)\n        instance_on = self.test_cfg.get('instance_on', False)\n        assert not semantic_on, 'segmantic segmentation '\\\n            'results are not supported yet.'\n\n        results = []\n        for mask_cls_result, mask_pred_result, meta in zip(\n                mask_cls_results, mask_pred_results, img_metas):\n            # remove padding\n            img_height, img_width = meta['img_shape'][:2]\n            mask_pred_result = mask_pred_result[:, :img_height, :img_width]\n\n            if rescale:\n                # return result in original resolution\n                ori_height, ori_width = meta['ori_shape'][:2]\n                mask_pred_result = F.interpolate(\n                    mask_pred_result[:, None],\n                    size=(ori_height, ori_width),\n                    mode='bilinear',\n                    align_corners=False)[:, 0]\n\n            result = dict()\n            if panoptic_on:\n                pan_results = self.panoptic_postprocess(\n                    mask_cls_result, mask_pred_result)\n                result['pan_results'] = pan_results\n\n            if instance_on:\n                ins_results = self.instance_postprocess(\n                    mask_cls_result, mask_pred_result)\n                result['ins_results'] = ins_results\n\n            if semantic_on:\n                sem_results = self.semantic_postprocess(\n                    mask_cls_result, mask_pred_result)\n                result['sem_results'] = sem_results\n\n            results.append(result)\n\n        return results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .brick_wrappers import AdaptiveAvgPool2d, adaptive_avg_pool2d\nfrom .builder import build_linear_layer, build_transformer\nfrom .ckpt_convert import pvt_convert\nfrom .conv_upsample import ConvUpsample\nfrom .csp_layer import CSPLayer\nfrom .gaussian_target import gaussian_radius, gen_gaussian_target\nfrom .inverted_residual import InvertedResidual\nfrom .make_divisible import make_divisible\nfrom .misc import interpolate_as, sigmoid_geometric_mean\nfrom .normed_predictor import NormedConv2d, NormedLinear\nfrom .panoptic_gt_processing import preprocess_panoptic_gt\nfrom .point_sample import (get_uncertain_point_coords_with_randomness,\n                           get_uncertainty)\nfrom .positional_encoding import (LearnedPositionalEncoding,\n                                  SinePositionalEncoding)\nfrom .res_layer import ResLayer, SimplifiedBasicBlock\nfrom .se_layer import DyReLU, SELayer\nfrom .transformer import (DetrTransformerDecoder, DetrTransformerDecoderLayer,\n                          DynamicConv, PatchEmbed, Transformer, nchw_to_nlc,\n                          nlc_to_nchw)\n\n__all__ = [\n    'ResLayer', 'gaussian_radius', 'gen_gaussian_target',\n    'DetrTransformerDecoderLayer', 'DetrTransformerDecoder', 'Transformer',\n    'build_transformer', 'build_linear_layer', 'SinePositionalEncoding',\n    'LearnedPositionalEncoding', 'DynamicConv', 'SimplifiedBasicBlock',\n    'NormedLinear', 'NormedConv2d', 'make_divisible', 'InvertedResidual',\n    'SELayer', 'interpolate_as', 'ConvUpsample', 'CSPLayer',\n    'adaptive_avg_pool2d', 'AdaptiveAvgPool2d', 'PatchEmbed', 'nchw_to_nlc',\n    'nlc_to_nchw', 'pvt_convert', 'sigmoid_geometric_mean',\n    'preprocess_panoptic_gt', 'DyReLU',\n    'get_uncertain_point_coords_with_randomness', 'get_uncertainty'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/brick_wrappers.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn.bricks.wrappers import NewEmptyTensorOp, obsolete_torch_version\n\nif torch.__version__ == 'parrots':\n    TORCH_VERSION = torch.__version__\nelse:\n    # torch.__version__ could be 1.3.1+cu92, we only need the first two\n    # for comparison\n    TORCH_VERSION = tuple(int(x) for x in torch.__version__.split('.')[:2])\n\n\ndef adaptive_avg_pool2d(input, output_size):\n    \"\"\"Handle empty batch dimension to adaptive_avg_pool2d.\n\n    Args:\n        input (tensor): 4D tensor.\n        output_size (int, tuple[int,int]): the target output size.\n    \"\"\"\n    if input.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):\n        if isinstance(output_size, int):\n            output_size = [output_size, output_size]\n        output_size = [*input.shape[:2], *output_size]\n        empty = NewEmptyTensorOp.apply(input, output_size)\n        return empty\n    else:\n        return F.adaptive_avg_pool2d(input, output_size)\n\n\nclass AdaptiveAvgPool2d(nn.AdaptiveAvgPool2d):\n    \"\"\"Handle empty batch dimension to AdaptiveAvgPool2d.\"\"\"\n\n    def forward(self, x):\n        # PyTorch 1.9 does not support empty tensor inference yet\n        if x.numel() == 0 and obsolete_torch_version(TORCH_VERSION, (1, 9)):\n            output_size = self.output_size\n            if isinstance(output_size, int):\n                output_size = [output_size, output_size]\n            else:\n                output_size = [\n                    v if v is not None else d\n                    for v, d in zip(output_size,\n                                    x.size()[-2:])\n                ]\n            output_size = [*x.shape[:2], *output_size]\n            empty = NewEmptyTensorOp.apply(x, output_size)\n            return empty\n\n        return super().forward(x)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/builder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nfrom mmcv.utils import Registry, build_from_cfg\n\nTRANSFORMER = Registry('Transformer')\nLINEAR_LAYERS = Registry('linear layers')\n\n\ndef build_transformer(cfg, default_args=None):\n    \"\"\"Builder for Transformer.\"\"\"\n    return build_from_cfg(cfg, TRANSFORMER, default_args)\n\n\nLINEAR_LAYERS.register_module('Linear', module=nn.Linear)\n\n\ndef build_linear_layer(cfg, *args, **kwargs):\n    \"\"\"Build linear layer.\n    Args:\n        cfg (None or dict): The linear layer config, which should contain:\n            - type (str): Layer type.\n            - layer args: Args needed to instantiate an linear layer.\n        args (argument list): Arguments passed to the `__init__`\n            method of the corresponding linear layer.\n        kwargs (keyword arguments): Keyword arguments passed to the `__init__`\n            method of the corresponding linear layer.\n    Returns:\n        nn.Module: Created linear layer.\n    \"\"\"\n    if cfg is None:\n        cfg_ = dict(type='Linear')\n    else:\n        if not isinstance(cfg, dict):\n            raise TypeError('cfg must be a dict')\n        if 'type' not in cfg:\n            raise KeyError('the cfg dict must contain the key \"type\"')\n        cfg_ = cfg.copy()\n\n    layer_type = cfg_.pop('type')\n    if layer_type not in LINEAR_LAYERS:\n        raise KeyError(f'Unrecognized linear type {layer_type}')\n    else:\n        linear_layer = LINEAR_LAYERS.get(layer_type)\n\n    layer = linear_layer(*args, **kwargs, **cfg_)\n\n    return layer\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/ckpt_convert.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\n# This script consists of several convert functions which\n# can modify the weights of model in original repo to be\n# pre-trained weights.\n\nfrom collections import OrderedDict\n\nimport torch\n\n\ndef pvt_convert(ckpt):\n    new_ckpt = OrderedDict()\n    # Process the concat between q linear weights and kv linear weights\n    use_abs_pos_embed = False\n    use_conv_ffn = False\n    for k in ckpt.keys():\n        if k.startswith('pos_embed'):\n            use_abs_pos_embed = True\n        if k.find('dwconv') >= 0:\n            use_conv_ffn = True\n    for k, v in ckpt.items():\n        if k.startswith('head'):\n            continue\n        if k.startswith('norm.'):\n            continue\n        if k.startswith('cls_token'):\n            continue\n        if k.startswith('pos_embed'):\n            stage_i = int(k.replace('pos_embed', ''))\n            new_k = k.replace(f'pos_embed{stage_i}',\n                              f'layers.{stage_i - 1}.1.0.pos_embed')\n            if stage_i == 4 and v.size(1) == 50:  # 1 (cls token) + 7 * 7\n                new_v = v[:, 1:, :]  # remove cls token\n            else:\n                new_v = v\n        elif k.startswith('patch_embed'):\n            stage_i = int(k.split('.')[0].replace('patch_embed', ''))\n            new_k = k.replace(f'patch_embed{stage_i}',\n                              f'layers.{stage_i - 1}.0')\n            new_v = v\n            if 'proj.' in new_k:\n                new_k = new_k.replace('proj.', 'projection.')\n        elif k.startswith('block'):\n            stage_i = int(k.split('.')[0].replace('block', ''))\n            layer_i = int(k.split('.')[1])\n            new_layer_i = layer_i + use_abs_pos_embed\n            new_k = k.replace(f'block{stage_i}.{layer_i}',\n                              f'layers.{stage_i - 1}.1.{new_layer_i}')\n            new_v = v\n            if 'attn.q.' in new_k:\n                sub_item_k = k.replace('q.', 'kv.')\n                new_k = new_k.replace('q.', 'attn.in_proj_')\n                new_v = torch.cat([v, ckpt[sub_item_k]], dim=0)\n            elif 'attn.kv.' in new_k:\n                continue\n            elif 'attn.proj.' in new_k:\n                new_k = new_k.replace('proj.', 'attn.out_proj.')\n            elif 'attn.sr.' in new_k:\n                new_k = new_k.replace('sr.', 'sr.')\n            elif 'mlp.' in new_k:\n                string = f'{new_k}-'\n                new_k = new_k.replace('mlp.', 'ffn.layers.')\n                if 'fc1.weight' in new_k or 'fc2.weight' in new_k:\n                    new_v = v.reshape((*v.shape, 1, 1))\n                new_k = new_k.replace('fc1.', '0.')\n                new_k = new_k.replace('dwconv.dwconv.', '1.')\n                if use_conv_ffn:\n                    new_k = new_k.replace('fc2.', '4.')\n                else:\n                    new_k = new_k.replace('fc2.', '3.')\n                string += f'{new_k} {v.shape}-{new_v.shape}'\n        elif k.startswith('norm'):\n            stage_i = int(k[4])\n            new_k = k.replace(f'norm{stage_i}', f'layers.{stage_i - 1}.2')\n            new_v = v\n        else:\n            new_k = k\n            new_v = v\n        new_ckpt[new_k] = new_v\n\n    return new_ckpt\n\n\ndef swin_converter(ckpt):\n\n    new_ckpt = OrderedDict()\n\n    def correct_unfold_reduction_order(x):\n        out_channel, in_channel = x.shape\n        x = x.reshape(out_channel, 4, in_channel // 4)\n        x = x[:, [0, 2, 1, 3], :].transpose(1,\n                                            2).reshape(out_channel, in_channel)\n        return x\n\n    def correct_unfold_norm_order(x):\n        in_channel = x.shape[0]\n        x = x.reshape(4, in_channel // 4)\n        x = x[[0, 2, 1, 3], :].transpose(0, 1).reshape(in_channel)\n        return x\n\n    for k, v in ckpt.items():\n        if k.startswith('head'):\n            continue\n        elif k.startswith('layers'):\n            new_v = v\n            if 'attn.' in k:\n                new_k = k.replace('attn.', 'attn.w_msa.')\n            elif 'mlp.' in k:\n                if 'mlp.fc1.' in k:\n                    new_k = k.replace('mlp.fc1.', 'ffn.layers.0.0.')\n                elif 'mlp.fc2.' in k:\n                    new_k = k.replace('mlp.fc2.', 'ffn.layers.1.')\n                else:\n                    new_k = k.replace('mlp.', 'ffn.')\n            elif 'downsample' in k:\n                new_k = k\n                if 'reduction.' in k:\n                    new_v = correct_unfold_reduction_order(v)\n                elif 'norm.' in k:\n                    new_v = correct_unfold_norm_order(v)\n            else:\n                new_k = k\n            new_k = new_k.replace('layers', 'stages', 1)\n        elif k.startswith('patch_embed'):\n            new_v = v\n            if 'proj' in k:\n                new_k = k.replace('proj', 'projection')\n            else:\n                new_k = k\n        else:\n            new_v = v\n            new_k = k\n\n        new_ckpt['backbone.' + new_k] = new_v\n\n    return new_ckpt\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/conv_upsample.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn.functional as F\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule, ModuleList\n\n\nclass ConvUpsample(BaseModule):\n    \"\"\"ConvUpsample performs 2x upsampling after Conv.\n\n    There are several `ConvModule` layers. In the first few layers, upsampling\n    will be applied after each layer of convolution. The number of upsampling\n    must be no more than the number of ConvModule layers.\n\n    Args:\n        in_channels (int): Number of channels in the input feature map.\n        inner_channels (int): Number of channels produced by the convolution.\n        num_layers (int): Number of convolution layers.\n        num_upsample (int | optional): Number of upsampling layer. Must be no\n            more than num_layers. Upsampling will be applied after the first\n            ``num_upsample`` layers of convolution. Default: ``num_layers``.\n        conv_cfg (dict): Config dict for convolution layer. Default: None,\n            which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer. Default: None.\n        init_cfg (dict): Config dict for initialization. Default: None.\n        kwargs (key word augments): Other augments used in ConvModule.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 inner_channels,\n                 num_layers=1,\n                 num_upsample=None,\n                 conv_cfg=None,\n                 norm_cfg=None,\n                 init_cfg=None,\n                 **kwargs):\n        super(ConvUpsample, self).__init__(init_cfg)\n        if num_upsample is None:\n            num_upsample = num_layers\n        assert num_upsample <= num_layers, \\\n            f'num_upsample({num_upsample})must be no more than ' \\\n            f'num_layers({num_layers})'\n        self.num_layers = num_layers\n        self.num_upsample = num_upsample\n        self.conv = ModuleList()\n        for i in range(num_layers):\n            self.conv.append(\n                ConvModule(\n                    in_channels,\n                    inner_channels,\n                    3,\n                    padding=1,\n                    stride=1,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    **kwargs))\n            in_channels = inner_channels\n\n    def forward(self, x):\n        num_upsample = self.num_upsample\n        for i in range(self.num_layers):\n            x = self.conv[i](x)\n            if num_upsample > 0:\n                num_upsample -= 1\n                x = F.interpolate(\n                    x, scale_factor=2, mode='bilinear', align_corners=False)\n        return x\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/csp_layer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule, DepthwiseSeparableConvModule\nfrom mmcv.runner import BaseModule\n\n\nclass DarknetBottleneck(BaseModule):\n    \"\"\"The basic bottleneck block used in Darknet.\n\n    Each ResBlock consists of two ConvModules and the input is added to the\n    final output. Each ConvModule is composed of Conv, BN, and LeakyReLU.\n    The first convLayer has filter size of 1x1 and the second one has the\n    filter size of 3x3.\n\n    Args:\n        in_channels (int): The input channels of this Module.\n        out_channels (int): The output channels of this Module.\n        expansion (int): The kernel size of the convolution. Default: 0.5\n        add_identity (bool): Whether to add identity to the out.\n            Default: True\n        use_depthwise (bool): Whether to use depthwise separable convolution.\n            Default: False\n        conv_cfg (dict): Config dict for convolution layer. Default: None,\n            which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN').\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='Swish').\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 expansion=0.5,\n                 add_identity=True,\n                 use_depthwise=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        hidden_channels = int(out_channels * expansion)\n        conv = DepthwiseSeparableConvModule if use_depthwise else ConvModule\n        self.conv1 = ConvModule(\n            in_channels,\n            hidden_channels,\n            1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.conv2 = conv(\n            hidden_channels,\n            out_channels,\n            3,\n            stride=1,\n            padding=1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.add_identity = \\\n            add_identity and in_channels == out_channels\n\n    def forward(self, x):\n        identity = x\n        out = self.conv1(x)\n        out = self.conv2(out)\n\n        if self.add_identity:\n            return out + identity\n        else:\n            return out\n\n\nclass CSPLayer(BaseModule):\n    \"\"\"Cross Stage Partial Layer.\n\n    Args:\n        in_channels (int): The input channels of the CSP layer.\n        out_channels (int): The output channels of the CSP layer.\n        expand_ratio (float): Ratio to adjust the number of channels of the\n            hidden layer. Default: 0.5\n        num_blocks (int): Number of blocks. Default: 1\n        add_identity (bool): Whether to add identity in blocks.\n            Default: True\n        use_depthwise (bool): Whether to depthwise separable convolution in\n            blocks. Default: False\n        conv_cfg (dict, optional): Config dict for convolution layer.\n            Default: None, which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN')\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='Swish')\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 expand_ratio=0.5,\n                 num_blocks=1,\n                 add_identity=True,\n                 use_depthwise=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),\n                 act_cfg=dict(type='Swish'),\n                 init_cfg=None):\n        super().__init__(init_cfg)\n        mid_channels = int(out_channels * expand_ratio)\n        self.main_conv = ConvModule(\n            in_channels,\n            mid_channels,\n            1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.short_conv = ConvModule(\n            in_channels,\n            mid_channels,\n            1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n        self.final_conv = ConvModule(\n            2 * mid_channels,\n            out_channels,\n            1,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n\n        self.blocks = nn.Sequential(*[\n            DarknetBottleneck(\n                mid_channels,\n                mid_channels,\n                1.0,\n                add_identity,\n                use_depthwise,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg) for _ in range(num_blocks)\n        ])\n\n    def forward(self, x):\n        x_short = self.short_conv(x)\n\n        x_main = self.main_conv(x)\n        x_main = self.blocks(x_main)\n\n        x_final = torch.cat((x_main, x_short), dim=1)\n        return self.final_conv(x_final)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/gaussian_target.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom math import sqrt\n\nimport torch\nimport torch.nn.functional as F\n\n\ndef gaussian2D(radius, sigma=1, dtype=torch.float32, device='cpu'):\n    \"\"\"Generate 2D gaussian kernel.\n\n    Args:\n        radius (int): Radius of gaussian kernel.\n        sigma (int): Sigma of gaussian function. Default: 1.\n        dtype (torch.dtype): Dtype of gaussian tensor. Default: torch.float32.\n        device (str): Device of gaussian tensor. Default: 'cpu'.\n\n    Returns:\n        h (Tensor): Gaussian kernel with a\n            ``(2 * radius + 1) * (2 * radius + 1)`` shape.\n    \"\"\"\n    x = torch.arange(\n        -radius, radius + 1, dtype=dtype, device=device).view(1, -1)\n    y = torch.arange(\n        -radius, radius + 1, dtype=dtype, device=device).view(-1, 1)\n\n    h = (-(x * x + y * y) / (2 * sigma * sigma)).exp()\n\n    h[h < torch.finfo(h.dtype).eps * h.max()] = 0\n    return h\n\n\ndef gen_gaussian_target(heatmap, center, radius, k=1):\n    \"\"\"Generate 2D gaussian heatmap.\n\n    Args:\n        heatmap (Tensor): Input heatmap, the gaussian kernel will cover on\n            it and maintain the max value.\n        center (list[int]): Coord of gaussian kernel's center.\n        radius (int): Radius of gaussian kernel.\n        k (int): Coefficient of gaussian kernel. Default: 1.\n\n    Returns:\n        out_heatmap (Tensor): Updated heatmap covered by gaussian kernel.\n    \"\"\"\n    diameter = 2 * radius + 1\n    gaussian_kernel = gaussian2D(\n        radius, sigma=diameter / 6, dtype=heatmap.dtype, device=heatmap.device)\n\n    x, y = center\n\n    height, width = heatmap.shape[:2]\n\n    left, right = min(x, radius), min(width - x, radius + 1)\n    top, bottom = min(y, radius), min(height - y, radius + 1)\n\n    masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]\n    masked_gaussian = gaussian_kernel[radius - top:radius + bottom,\n                                      radius - left:radius + right]\n    out_heatmap = heatmap\n    torch.max(\n        masked_heatmap,\n        masked_gaussian * k,\n        out=out_heatmap[y - top:y + bottom, x - left:x + right])\n\n    return out_heatmap\n\n\ndef gaussian_radius(det_size, min_overlap):\n    r\"\"\"Generate 2D gaussian radius.\n\n    This function is modified from the `official github repo\n    <https://github.com/princeton-vl/CornerNet-Lite/blob/master/core/sample/\n    utils.py#L65>`_.\n\n    Given ``min_overlap``, radius could computed by a quadratic equation\n    according to Vieta's formulas.\n\n    There are 3 cases for computing gaussian radius, details are following:\n\n    - Explanation of figure: ``lt`` and ``br`` indicates the left-top and\n      bottom-right corner of ground truth box. ``x`` indicates the\n      generated corner at the limited position when ``radius=r``.\n\n    - Case1: one corner is inside the gt box and the other is outside.\n\n    .. code:: text\n\n        |<   width   >|\n\n        lt-+----------+         -\n        |  |          |         ^\n        +--x----------+--+\n        |  |          |  |\n        |  |          |  |    height\n        |  | overlap  |  |\n        |  |          |  |\n        |  |          |  |      v\n        +--+---------br--+      -\n           |          |  |\n           +----------+--x\n\n    To ensure IoU of generated box and gt box is larger than ``min_overlap``:\n\n    .. math::\n        \\cfrac{(w-r)*(h-r)}{w*h+(w+h)r-r^2} \\ge {iou} \\quad\\Rightarrow\\quad\n        {r^2-(w+h)r+\\cfrac{1-iou}{1+iou}*w*h} \\ge 0 \\\\\n        {a} = 1,\\quad{b} = {-(w+h)},\\quad{c} = {\\cfrac{1-iou}{1+iou}*w*h} \\\\\n        {r} \\le \\cfrac{-b-\\sqrt{b^2-4*a*c}}{2*a}\n\n    - Case2: both two corners are inside the gt box.\n\n    .. code:: text\n\n        |<   width   >|\n\n        lt-+----------+         -\n        |  |          |         ^\n        +--x-------+  |\n        |  |       |  |\n        |  |overlap|  |       height\n        |  |       |  |\n        |  +-------x--+\n        |          |  |         v\n        +----------+-br         -\n\n    To ensure IoU of generated box and gt box is larger than ``min_overlap``:\n\n    .. math::\n        \\cfrac{(w-2*r)*(h-2*r)}{w*h} \\ge {iou} \\quad\\Rightarrow\\quad\n        {4r^2-2(w+h)r+(1-iou)*w*h} \\ge 0 \\\\\n        {a} = 4,\\quad {b} = {-2(w+h)},\\quad {c} = {(1-iou)*w*h} \\\\\n        {r} \\le \\cfrac{-b-\\sqrt{b^2-4*a*c}}{2*a}\n\n    - Case3: both two corners are outside the gt box.\n\n    .. code:: text\n\n           |<   width   >|\n\n        x--+----------------+\n        |  |                |\n        +-lt-------------+  |   -\n        |  |             |  |   ^\n        |  |             |  |\n        |  |   overlap   |  | height\n        |  |             |  |\n        |  |             |  |   v\n        |  +------------br--+   -\n        |                |  |\n        +----------------+--x\n\n    To ensure IoU of generated box and gt box is larger than ``min_overlap``:\n\n    .. math::\n        \\cfrac{w*h}{(w+2*r)*(h+2*r)} \\ge {iou} \\quad\\Rightarrow\\quad\n        {4*iou*r^2+2*iou*(w+h)r+(iou-1)*w*h} \\le 0 \\\\\n        {a} = {4*iou},\\quad {b} = {2*iou*(w+h)},\\quad {c} = {(iou-1)*w*h} \\\\\n        {r} \\le \\cfrac{-b+\\sqrt{b^2-4*a*c}}{2*a}\n\n    Args:\n        det_size (list[int]): Shape of object.\n        min_overlap (float): Min IoU with ground truth for boxes generated by\n            keypoints inside the gaussian kernel.\n\n    Returns:\n        radius (int): Radius of gaussian kernel.\n    \"\"\"\n    height, width = det_size\n\n    a1 = 1\n    b1 = (height + width)\n    c1 = width * height * (1 - min_overlap) / (1 + min_overlap)\n    sq1 = sqrt(b1**2 - 4 * a1 * c1)\n    r1 = (b1 - sq1) / (2 * a1)\n\n    a2 = 4\n    b2 = 2 * (height + width)\n    c2 = (1 - min_overlap) * width * height\n    sq2 = sqrt(b2**2 - 4 * a2 * c2)\n    r2 = (b2 - sq2) / (2 * a2)\n\n    a3 = 4 * min_overlap\n    b3 = -2 * min_overlap * (height + width)\n    c3 = (min_overlap - 1) * width * height\n    sq3 = sqrt(b3**2 - 4 * a3 * c3)\n    r3 = (b3 + sq3) / (2 * a3)\n    return min(r1, r2, r3)\n\n\ndef get_local_maximum(heat, kernel=3):\n    \"\"\"Extract local maximum pixel with given kernel.\n\n    Args:\n        heat (Tensor): Target heatmap.\n        kernel (int): Kernel size of max pooling. Default: 3.\n\n    Returns:\n        heat (Tensor): A heatmap where local maximum pixels maintain its\n            own value and other positions are 0.\n    \"\"\"\n    pad = (kernel - 1) // 2\n    hmax = F.max_pool2d(heat, kernel, stride=1, padding=pad)\n    keep = (hmax == heat).float()\n    return heat * keep\n\n\ndef get_topk_from_heatmap(scores, k=20):\n    \"\"\"Get top k positions from heatmap.\n\n    Args:\n        scores (Tensor): Target heatmap with shape\n            [batch, num_classes, height, width].\n        k (int): Target number. Default: 20.\n\n    Returns:\n        tuple[torch.Tensor]: Scores, indexes, categories and coords of\n            topk keypoint. Containing following Tensors:\n\n        - topk_scores (Tensor): Max scores of each topk keypoint.\n        - topk_inds (Tensor): Indexes of each topk keypoint.\n        - topk_clses (Tensor): Categories of each topk keypoint.\n        - topk_ys (Tensor): Y-coord of each topk keypoint.\n        - topk_xs (Tensor): X-coord of each topk keypoint.\n    \"\"\"\n    batch, _, height, width = scores.size()\n    topk_scores, topk_inds = torch.topk(scores.view(batch, -1), k)\n    topk_clses = topk_inds // (height * width)\n    topk_inds = topk_inds % (height * width)\n    topk_ys = topk_inds // width\n    topk_xs = (topk_inds % width).int().float()\n    return topk_scores, topk_inds, topk_clses, topk_ys, topk_xs\n\n\ndef gather_feat(feat, ind, mask=None):\n    \"\"\"Gather feature according to index.\n\n    Args:\n        feat (Tensor): Target feature map.\n        ind (Tensor): Target coord index.\n        mask (Tensor | None): Mask of feature map. Default: None.\n\n    Returns:\n        feat (Tensor): Gathered feature.\n    \"\"\"\n    dim = feat.size(2)\n    ind = ind.unsqueeze(2).repeat(1, 1, dim)\n    feat = feat.gather(1, ind)\n    if mask is not None:\n        mask = mask.unsqueeze(2).expand_as(feat)\n        feat = feat[mask]\n        feat = feat.view(-1, dim)\n    return feat\n\n\ndef transpose_and_gather_feat(feat, ind):\n    \"\"\"Transpose and gather feature according to index.\n\n    Args:\n        feat (Tensor): Target feature map.\n        ind (Tensor): Target coord index.\n\n    Returns:\n        feat (Tensor): Transposed and gathered feature.\n    \"\"\"\n    feat = feat.permute(0, 2, 3, 1).contiguous()\n    feat = feat.view(feat.size(0), -1, feat.size(3))\n    feat = gather_feat(feat, ind)\n    return feat\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/inverted_residual.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch.nn as nn\nimport torch.utils.checkpoint as cp\nfrom mmcv.cnn import ConvModule\nfrom mmcv.cnn.bricks import DropPath\nfrom mmcv.runner import BaseModule\n\nfrom .se_layer import SELayer\n\n\nclass InvertedResidual(BaseModule):\n    \"\"\"Inverted Residual Block.\n\n    Args:\n        in_channels (int): The input channels of this Module.\n        out_channels (int): The output channels of this Module.\n        mid_channels (int): The input channels of the depthwise convolution.\n        kernel_size (int): The kernel size of the depthwise convolution.\n            Default: 3.\n        stride (int): The stride of the depthwise convolution. Default: 1.\n        se_cfg (dict): Config dict for se layer. Default: None, which means no\n            se layer.\n        with_expand_conv (bool): Use expand conv or not. If set False,\n            mid_channels must be the same with in_channels.\n            Default: True.\n        conv_cfg (dict): Config dict for convolution layer. Default: None,\n            which means using conv2d.\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: dict(type='BN').\n        act_cfg (dict): Config dict for activation layer.\n            Default: dict(type='ReLU').\n        drop_path_rate (float): stochastic depth rate. Defaults to 0.\n        with_cp (bool): Use checkpoint or not. Using checkpoint will save some\n            memory while slowing down the training speed. Default: False.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n\n    Returns:\n        Tensor: The output tensor.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 mid_channels,\n                 kernel_size=3,\n                 stride=1,\n                 se_cfg=None,\n                 with_expand_conv=True,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 act_cfg=dict(type='ReLU'),\n                 drop_path_rate=0.,\n                 with_cp=False,\n                 init_cfg=None):\n        super(InvertedResidual, self).__init__(init_cfg)\n        self.with_res_shortcut = (stride == 1 and in_channels == out_channels)\n        assert stride in [1, 2], f'stride must in [1, 2]. ' \\\n            f'But received {stride}.'\n        self.with_cp = with_cp\n        self.drop_path = DropPath(\n            drop_path_rate) if drop_path_rate > 0 else nn.Identity()\n        self.with_se = se_cfg is not None\n        self.with_expand_conv = with_expand_conv\n\n        if self.with_se:\n            assert isinstance(se_cfg, dict)\n        if not self.with_expand_conv:\n            assert mid_channels == in_channels\n\n        if self.with_expand_conv:\n            self.expand_conv = ConvModule(\n                in_channels=in_channels,\n                out_channels=mid_channels,\n                kernel_size=1,\n                stride=1,\n                padding=0,\n                conv_cfg=conv_cfg,\n                norm_cfg=norm_cfg,\n                act_cfg=act_cfg)\n        self.depthwise_conv = ConvModule(\n            in_channels=mid_channels,\n            out_channels=mid_channels,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=kernel_size // 2,\n            groups=mid_channels,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=act_cfg)\n\n        if self.with_se:\n            self.se = SELayer(**se_cfg)\n\n        self.linear_conv = ConvModule(\n            in_channels=mid_channels,\n            out_channels=out_channels,\n            kernel_size=1,\n            stride=1,\n            padding=0,\n            conv_cfg=conv_cfg,\n            norm_cfg=norm_cfg,\n            act_cfg=None)\n\n    def forward(self, x):\n\n        def _inner_forward(x):\n            out = x\n\n            if self.with_expand_conv:\n                out = self.expand_conv(out)\n\n            out = self.depthwise_conv(out)\n\n            if self.with_se:\n                out = self.se(out)\n\n            out = self.linear_conv(out)\n\n            if self.with_res_shortcut:\n                return x + self.drop_path(out)\n            else:\n                return out\n\n        if self.with_cp and x.requires_grad:\n            out = cp.checkpoint(_inner_forward, x)\n        else:\n            out = _inner_forward(x)\n\n        return out\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/make_divisible.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\ndef make_divisible(value, divisor, min_value=None, min_ratio=0.9):\n    \"\"\"Make divisible function.\n\n    This function rounds the channel number to the nearest value that can be\n    divisible by the divisor. It is taken from the original tf repo. It ensures\n    that all layers have a channel number that is divisible by divisor. It can\n    be seen here: https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py  # noqa\n\n    Args:\n        value (int): The original channel number.\n        divisor (int): The divisor to fully divide the channel number.\n        min_value (int): The minimum value of the output channel.\n            Default: None, means that the minimum value equal to the divisor.\n        min_ratio (float): The minimum ratio of the rounded channel number to\n            the original channel number. Default: 0.9.\n\n    Returns:\n        int: The modified output channel number.\n    \"\"\"\n\n    if min_value is None:\n        min_value = divisor\n    new_value = max(min_value, int(value + divisor / 2) // divisor * divisor)\n    # Make sure that round down does not go down by more than (1-min_ratio).\n    if new_value < min_ratio * value:\n        new_value += divisor\n    return new_value\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/misc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom torch.autograd import Function\nfrom torch.nn import functional as F\n\n\nclass SigmoidGeometricMean(Function):\n    \"\"\"Forward and backward function of geometric mean of two sigmoid\n    functions.\n\n    This implementation with analytical gradient function substitutes\n    the autograd function of (x.sigmoid() * y.sigmoid()).sqrt(). The\n    original implementation incurs none during gradient backprapagation\n    if both x and y are very small values.\n    \"\"\"\n\n    @staticmethod\n    def forward(ctx, x, y):\n        x_sigmoid = x.sigmoid()\n        y_sigmoid = y.sigmoid()\n        z = (x_sigmoid * y_sigmoid).sqrt()\n        ctx.save_for_backward(x_sigmoid, y_sigmoid, z)\n        return z\n\n    @staticmethod\n    def backward(ctx, grad_output):\n        x_sigmoid, y_sigmoid, z = ctx.saved_tensors\n        grad_x = grad_output * z * (1 - x_sigmoid) / 2\n        grad_y = grad_output * z * (1 - y_sigmoid) / 2\n        return grad_x, grad_y\n\n\nsigmoid_geometric_mean = SigmoidGeometricMean.apply\n\n\ndef interpolate_as(source, target, mode='bilinear', align_corners=False):\n    \"\"\"Interpolate the `source` to the shape of the `target`.\n\n    The `source` must be a Tensor, but the `target` can be a Tensor or a\n    np.ndarray with the shape (..., target_h, target_w).\n\n    Args:\n        source (Tensor): A 3D/4D Tensor with the shape (N, H, W) or\n            (N, C, H, W).\n        target (Tensor | np.ndarray): The interpolation target with the shape\n            (..., target_h, target_w).\n        mode (str): Algorithm used for interpolation. The options are the\n            same as those in F.interpolate(). Default: ``'bilinear'``.\n        align_corners (bool): The same as the argument in F.interpolate().\n\n    Returns:\n        Tensor: The interpolated source Tensor.\n    \"\"\"\n    assert len(target.shape) >= 2\n\n    def _interpolate_as(source, target, mode='bilinear', align_corners=False):\n        \"\"\"Interpolate the `source` (4D) to the shape of the `target`.\"\"\"\n        target_h, target_w = target.shape[-2:]\n        source_h, source_w = source.shape[-2:]\n        if target_h != source_h or target_w != source_w:\n            source = F.interpolate(\n                source,\n                size=(target_h, target_w),\n                mode=mode,\n                align_corners=align_corners)\n        return source\n\n    if len(source.shape) == 3:\n        source = source[:, None, :, :]\n        source = _interpolate_as(source, target, mode, align_corners)\n        return source[:, 0, :, :]\n    else:\n        return _interpolate_as(source, target, mode, align_corners)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/normed_predictor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import CONV_LAYERS\n\nfrom .builder import LINEAR_LAYERS\n\n\n@LINEAR_LAYERS.register_module(name='NormedLinear')\nclass NormedLinear(nn.Linear):\n    \"\"\"Normalized Linear Layer.\n\n    Args:\n        tempeature (float, optional): Tempeature term. Default to 20.\n        power (int, optional): Power term. Default to 1.0.\n        eps (float, optional): The minimal value of divisor to\n             keep numerical stability. Default to 1e-6.\n    \"\"\"\n\n    def __init__(self, *args, tempearture=20, power=1.0, eps=1e-6, **kwargs):\n        super(NormedLinear, self).__init__(*args, **kwargs)\n        self.tempearture = tempearture\n        self.power = power\n        self.eps = eps\n        self.init_weights()\n\n    def init_weights(self):\n        nn.init.normal_(self.weight, mean=0, std=0.01)\n        if self.bias is not None:\n            nn.init.constant_(self.bias, 0)\n\n    def forward(self, x):\n        weight_ = self.weight / (\n            self.weight.norm(dim=1, keepdim=True).pow(self.power) + self.eps)\n        x_ = x / (x.norm(dim=1, keepdim=True).pow(self.power) + self.eps)\n        x_ = x_ * self.tempearture\n\n        return F.linear(x_, weight_, self.bias)\n\n\n@CONV_LAYERS.register_module(name='NormedConv2d')\nclass NormedConv2d(nn.Conv2d):\n    \"\"\"Normalized Conv2d Layer.\n\n    Args:\n        tempeature (float, optional): Tempeature term. Default to 20.\n        power (int, optional): Power term. Default to 1.0.\n        eps (float, optional): The minimal value of divisor to\n             keep numerical stability. Default to 1e-6.\n        norm_over_kernel (bool, optional): Normalize over kernel.\n             Default to False.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 tempearture=20,\n                 power=1.0,\n                 eps=1e-6,\n                 norm_over_kernel=False,\n                 **kwargs):\n        super(NormedConv2d, self).__init__(*args, **kwargs)\n        self.tempearture = tempearture\n        self.power = power\n        self.norm_over_kernel = norm_over_kernel\n        self.eps = eps\n\n    def forward(self, x):\n        if not self.norm_over_kernel:\n            weight_ = self.weight / (\n                self.weight.norm(dim=1, keepdim=True).pow(self.power) +\n                self.eps)\n        else:\n            weight_ = self.weight / (\n                self.weight.view(self.weight.size(0), -1).norm(\n                    dim=1, keepdim=True).pow(self.power)[..., None, None] +\n                self.eps)\n        x_ = x / (x.norm(dim=1, keepdim=True).pow(self.power) + self.eps)\n        x_ = x_ * self.tempearture\n\n        if hasattr(self, 'conv2d_forward'):\n            x_ = self.conv2d_forward(x_, weight_)\n        else:\n            if torch.__version__ >= '1.8':\n                x_ = self._conv_forward(x_, weight_, self.bias)\n            else:\n                x_ = self._conv_forward(x_, weight_)\n        return x_\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/panoptic_gt_processing.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef preprocess_panoptic_gt(gt_labels, gt_masks, gt_semantic_seg, num_things,\n                           num_stuff, img_metas):\n    \"\"\"Preprocess the ground truth for a image.\n\n    Args:\n        gt_labels (Tensor): Ground truth labels of each bbox,\n            with shape (num_gts, ).\n        gt_masks (BitmapMasks): Ground truth masks of each instances\n            of a image, shape (num_gts, h, w).\n        gt_semantic_seg (Tensor | None): Ground truth of semantic\n            segmentation with the shape (1, h, w).\n            [0, num_thing_class - 1] means things,\n            [num_thing_class, num_class-1] means stuff,\n            255 means VOID. It's None when training instance segmentation.\n        img_metas (dict): List of image meta information.\n\n    Returns:\n        tuple: a tuple containing the following targets.\n\n            - labels (Tensor): Ground truth class indices for a\n                image, with shape (n, ), n is the sum of number\n                of stuff type and number of instance in a image.\n            - masks (Tensor): Ground truth mask for a image, with\n                shape (n, h, w). Contains stuff and things when training\n                panoptic segmentation, and things only when training\n                instance segmentation.\n    \"\"\"\n    num_classes = num_things + num_stuff\n\n    things_masks = gt_masks.pad(img_metas['pad_shape'][:2], pad_val=0)\\\n        .to_tensor(dtype=torch.bool, device=gt_labels.device)\n\n    if gt_semantic_seg is None:\n        masks = things_masks.long()\n        return gt_labels, masks\n\n    things_labels = gt_labels\n    gt_semantic_seg = gt_semantic_seg.squeeze(0)\n\n    semantic_labels = torch.unique(\n        gt_semantic_seg,\n        sorted=False,\n        return_inverse=False,\n        return_counts=False)\n    stuff_masks_list = []\n    stuff_labels_list = []\n    for label in semantic_labels:\n        if label < num_things or label >= num_classes:\n            continue\n        stuff_mask = gt_semantic_seg == label\n        stuff_masks_list.append(stuff_mask)\n        stuff_labels_list.append(label)\n\n    if len(stuff_masks_list) > 0:\n        stuff_masks = torch.stack(stuff_masks_list, dim=0)\n        stuff_labels = torch.stack(stuff_labels_list, dim=0)\n        labels = torch.cat([things_labels, stuff_labels], dim=0)\n        masks = torch.cat([things_masks, stuff_masks], dim=0)\n    else:\n        labels = things_labels\n        masks = things_masks\n\n    masks = masks.long()\n    return labels, masks\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/point_sample.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.ops import point_sample\n\n\ndef get_uncertainty(mask_pred, labels):\n    \"\"\"Estimate uncertainty based on pred logits.\n\n    We estimate uncertainty as L1 distance between 0.0 and the logits\n    prediction in 'mask_pred' for the foreground class in `classes`.\n\n    Args:\n        mask_pred (Tensor): mask predication logits, shape (num_rois,\n            num_classes, mask_height, mask_width).\n\n        labels (list[Tensor]): Either predicted or ground truth label for\n            each predicted mask, of length num_rois.\n\n    Returns:\n        scores (Tensor): Uncertainty scores with the most uncertain\n            locations having the highest uncertainty score,\n            shape (num_rois, 1, mask_height, mask_width)\n    \"\"\"\n    if mask_pred.shape[1] == 1:\n        gt_class_logits = mask_pred.clone()\n    else:\n        inds = torch.arange(mask_pred.shape[0], device=mask_pred.device)\n        gt_class_logits = mask_pred[inds, labels].unsqueeze(1)\n    return -torch.abs(gt_class_logits)\n\n\ndef get_uncertain_point_coords_with_randomness(mask_pred, labels, num_points,\n                                               oversample_ratio,\n                                               importance_sample_ratio):\n    \"\"\"Get ``num_points`` most uncertain points with random points during\n    train.\n\n    Sample points in [0, 1] x [0, 1] coordinate space based on their\n    uncertainty. The uncertainties are calculated for each point using\n    'get_uncertainty()' function that takes point's logit prediction as\n    input.\n\n    Args:\n        mask_pred (Tensor): A tensor of shape (num_rois, num_classes,\n            mask_height, mask_width) for class-specific or class-agnostic\n            prediction.\n        labels (list): The ground truth class for each instance.\n        num_points (int): The number of points to sample.\n        oversample_ratio (int): Oversampling parameter.\n        importance_sample_ratio (float): Ratio of points that are sampled\n            via importnace sampling.\n\n    Returns:\n        point_coords (Tensor): A tensor of shape (num_rois, num_points, 2)\n            that contains the coordinates sampled points.\n    \"\"\"\n    assert oversample_ratio >= 1\n    assert 0 <= importance_sample_ratio <= 1\n    batch_size = mask_pred.shape[0]\n    num_sampled = int(num_points * oversample_ratio)\n    point_coords = torch.rand(\n        batch_size, num_sampled, 2, device=mask_pred.device)\n    point_logits = point_sample(mask_pred, point_coords)\n    # It is crucial to calculate uncertainty based on the sampled\n    # prediction value for the points. Calculating uncertainties of the\n    # coarse predictions first and sampling them for points leads to\n    # incorrect results.  To illustrate this: assume uncertainty func(\n    # logits)=-abs(logits), a sampled point between two coarse\n    # predictions with -1 and 1 logits has 0 logits, and therefore 0\n    # uncertainty value. However, if we calculate uncertainties for the\n    # coarse predictions first, both will have -1 uncertainty,\n    # and sampled point will get -1 uncertainty.\n    point_uncertainties = get_uncertainty(point_logits, labels)\n    num_uncertain_points = int(importance_sample_ratio * num_points)\n    num_random_points = num_points - num_uncertain_points\n    idx = torch.topk(\n        point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1]\n    shift = num_sampled * torch.arange(\n        batch_size, dtype=torch.long, device=mask_pred.device)\n    idx += shift[:, None]\n    point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(\n        batch_size, num_uncertain_points, 2)\n    if num_random_points > 0:\n        rand_roi_coords = torch.rand(\n            batch_size, num_random_points, 2, device=mask_pred.device)\n        point_coords = torch.cat((point_coords, rand_roi_coords), dim=1)\n    return point_coords\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/positional_encoding.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\n\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn.bricks.transformer import POSITIONAL_ENCODING\nfrom mmcv.runner import BaseModule\n\n\n@POSITIONAL_ENCODING.register_module()\nclass SinePositionalEncoding(BaseModule):\n    \"\"\"Position encoding with sine and cosine functions.\n\n    See `End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n\n    Args:\n        num_feats (int): The feature dimension for each position\n            along x-axis or y-axis. Note the final returned dimension\n            for each position is 2 times of this value.\n        temperature (int, optional): The temperature used for scaling\n            the position embedding. Defaults to 10000.\n        normalize (bool, optional): Whether to normalize the position\n            embedding. Defaults to False.\n        scale (float, optional): A scale factor that scales the position\n            embedding. The scale will be used only when `normalize` is True.\n            Defaults to 2*pi.\n        eps (float, optional): A value added to the denominator for\n            numerical stability. Defaults to 1e-6.\n        offset (float): offset add to embed when do the normalization.\n            Defaults to 0.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 num_feats,\n                 temperature=10000,\n                 normalize=False,\n                 scale=2 * math.pi,\n                 eps=1e-6,\n                 offset=0.,\n                 init_cfg=None):\n        super(SinePositionalEncoding, self).__init__(init_cfg)\n        if normalize:\n            assert isinstance(scale, (float, int)), 'when normalize is set,' \\\n                'scale should be provided and in float or int type, ' \\\n                f'found {type(scale)}'\n        self.num_feats = num_feats\n        self.temperature = temperature\n        self.normalize = normalize\n        self.scale = scale\n        self.eps = eps\n        self.offset = offset\n\n    def forward(self, mask):\n        \"\"\"Forward function for `SinePositionalEncoding`.\n\n        Args:\n            mask (Tensor): ByteTensor mask. Non-zero values representing\n                ignored positions, while zero values means valid positions\n                for this image. Shape [bs, h, w].\n\n        Returns:\n            pos (Tensor): Returned position embedding with shape\n                [bs, num_feats*2, h, w].\n        \"\"\"\n        # For convenience of exporting to ONNX, it's required to convert\n        # `masks` from bool to int.\n        mask = mask.to(torch.int)\n        not_mask = 1 - mask  # logical_not\n        y_embed = not_mask.cumsum(1, dtype=torch.float32)\n        x_embed = not_mask.cumsum(2, dtype=torch.float32)\n        if self.normalize:\n            y_embed = (y_embed + self.offset) / \\\n                      (y_embed[:, -1:, :] + self.eps) * self.scale\n            x_embed = (x_embed + self.offset) / \\\n                      (x_embed[:, :, -1:] + self.eps) * self.scale\n        dim_t = torch.arange(\n            self.num_feats, dtype=torch.float32, device=mask.device)\n        dim_t = self.temperature**(2 * (dim_t // 2) / self.num_feats)\n        pos_x = x_embed[:, :, :, None] / dim_t\n        pos_y = y_embed[:, :, :, None] / dim_t\n        # use `view` instead of `flatten` for dynamically exporting to ONNX\n        B, H, W = mask.size()\n        pos_x = torch.stack(\n            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()),\n            dim=4).view(B, H, W, -1)\n        pos_y = torch.stack(\n            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()),\n            dim=4).view(B, H, W, -1)\n        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)\n        return pos\n\n    def __repr__(self):\n        \"\"\"str: a string that describes the module\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(num_feats={self.num_feats}, '\n        repr_str += f'temperature={self.temperature}, '\n        repr_str += f'normalize={self.normalize}, '\n        repr_str += f'scale={self.scale}, '\n        repr_str += f'eps={self.eps})'\n        return repr_str\n\n\n@POSITIONAL_ENCODING.register_module()\nclass LearnedPositionalEncoding(BaseModule):\n    \"\"\"Position embedding with learnable embedding weights.\n\n    Args:\n        num_feats (int): The feature dimension for each position\n            along x-axis or y-axis. The final returned dimension for\n            each position is 2 times of this value.\n        row_num_embed (int, optional): The dictionary size of row embeddings.\n            Default 50.\n        col_num_embed (int, optional): The dictionary size of col embeddings.\n            Default 50.\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n    \"\"\"\n\n    def __init__(self,\n                 num_feats,\n                 row_num_embed=50,\n                 col_num_embed=50,\n                 init_cfg=dict(type='Uniform', layer='Embedding')):\n        super(LearnedPositionalEncoding, self).__init__(init_cfg)\n        self.row_embed = nn.Embedding(row_num_embed, num_feats)\n        self.col_embed = nn.Embedding(col_num_embed, num_feats)\n        self.num_feats = num_feats\n        self.row_num_embed = row_num_embed\n        self.col_num_embed = col_num_embed\n\n    def forward(self, mask):\n        \"\"\"Forward function for `LearnedPositionalEncoding`.\n\n        Args:\n            mask (Tensor): ByteTensor mask. Non-zero values representing\n                ignored positions, while zero values means valid positions\n                for this image. Shape [bs, h, w].\n\n        Returns:\n            pos (Tensor): Returned position embedding with shape\n                [bs, num_feats*2, h, w].\n        \"\"\"\n        h, w = mask.shape[-2:]\n        x = torch.arange(w, device=mask.device)\n        y = torch.arange(h, device=mask.device)\n        x_embed = self.col_embed(x)\n        y_embed = self.row_embed(y)\n        pos = torch.cat(\n            (x_embed.unsqueeze(0).repeat(h, 1, 1), y_embed.unsqueeze(1).repeat(\n                1, w, 1)),\n            dim=-1).permute(2, 0,\n                            1).unsqueeze(0).repeat(mask.shape[0], 1, 1, 1)\n        return pos\n\n    def __repr__(self):\n        \"\"\"str: a string that describes the module\"\"\"\n        repr_str = self.__class__.__name__\n        repr_str += f'(num_feats={self.num_feats}, '\n        repr_str += f'row_num_embed={self.row_num_embed}, '\n        repr_str += f'col_num_embed={self.col_num_embed})'\n        return repr_str\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/res_layer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.cnn import build_conv_layer, build_norm_layer\nfrom mmcv.runner import BaseModule, Sequential\nfrom torch import nn as nn\n\n\nclass ResLayer(Sequential):\n    \"\"\"ResLayer to build ResNet style backbone.\n\n    Args:\n        block (nn.Module): block used to build ResLayer.\n        inplanes (int): inplanes of block.\n        planes (int): planes of block.\n        num_blocks (int): number of blocks.\n        stride (int): stride of the first block. Default: 1\n        avg_down (bool): Use AvgPool instead of stride conv when\n            downsampling in the bottleneck. Default: False\n        conv_cfg (dict): dictionary to construct and config conv layer.\n            Default: None\n        norm_cfg (dict): dictionary to construct and config norm layer.\n            Default: dict(type='BN')\n        downsample_first (bool): Downsample at the first block or last block.\n            False for Hourglass, True for ResNet. Default: True\n    \"\"\"\n\n    def __init__(self,\n                 block,\n                 inplanes,\n                 planes,\n                 num_blocks,\n                 stride=1,\n                 avg_down=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 downsample_first=True,\n                 **kwargs):\n        self.block = block\n\n        downsample = None\n        if stride != 1 or inplanes != planes * block.expansion:\n            downsample = []\n            conv_stride = stride\n            if avg_down:\n                conv_stride = 1\n                downsample.append(\n                    nn.AvgPool2d(\n                        kernel_size=stride,\n                        stride=stride,\n                        ceil_mode=True,\n                        count_include_pad=False))\n            downsample.extend([\n                build_conv_layer(\n                    conv_cfg,\n                    inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=conv_stride,\n                    bias=False),\n                build_norm_layer(norm_cfg, planes * block.expansion)[1]\n            ])\n            downsample = nn.Sequential(*downsample)\n\n        layers = []\n        if downsample_first:\n            layers.append(\n                block(\n                    inplanes=inplanes,\n                    planes=planes,\n                    stride=stride,\n                    downsample=downsample,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    **kwargs))\n            inplanes = planes * block.expansion\n            for _ in range(1, num_blocks):\n                layers.append(\n                    block(\n                        inplanes=inplanes,\n                        planes=planes,\n                        stride=1,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg,\n                        **kwargs))\n\n        else:  # downsample_first=False is for HourglassModule\n            for _ in range(num_blocks - 1):\n                layers.append(\n                    block(\n                        inplanes=inplanes,\n                        planes=inplanes,\n                        stride=1,\n                        conv_cfg=conv_cfg,\n                        norm_cfg=norm_cfg,\n                        **kwargs))\n            layers.append(\n                block(\n                    inplanes=inplanes,\n                    planes=planes,\n                    stride=stride,\n                    downsample=downsample,\n                    conv_cfg=conv_cfg,\n                    norm_cfg=norm_cfg,\n                    **kwargs))\n        super(ResLayer, self).__init__(*layers)\n\n\nclass SimplifiedBasicBlock(BaseModule):\n    \"\"\"Simplified version of original basic residual block. This is used in\n    `SCNet <https://arxiv.org/abs/2012.10150>`_.\n\n    - Norm layer is now optional\n    - Last ReLU in forward function is removed\n    \"\"\"\n    expansion = 1\n\n    def __init__(self,\n                 inplanes,\n                 planes,\n                 stride=1,\n                 dilation=1,\n                 downsample=None,\n                 style='pytorch',\n                 with_cp=False,\n                 conv_cfg=None,\n                 norm_cfg=dict(type='BN'),\n                 dcn=None,\n                 plugins=None,\n                 init_fg=None):\n        super(SimplifiedBasicBlock, self).__init__(init_fg)\n        assert dcn is None, 'Not implemented yet.'\n        assert plugins is None, 'Not implemented yet.'\n        assert not with_cp, 'Not implemented yet.'\n        self.with_norm = norm_cfg is not None\n        with_bias = True if norm_cfg is None else False\n        self.conv1 = build_conv_layer(\n            conv_cfg,\n            inplanes,\n            planes,\n            3,\n            stride=stride,\n            padding=dilation,\n            dilation=dilation,\n            bias=with_bias)\n        if self.with_norm:\n            self.norm1_name, norm1 = build_norm_layer(\n                norm_cfg, planes, postfix=1)\n            self.add_module(self.norm1_name, norm1)\n        self.conv2 = build_conv_layer(\n            conv_cfg, planes, planes, 3, padding=1, bias=with_bias)\n        if self.with_norm:\n            self.norm2_name, norm2 = build_norm_layer(\n                norm_cfg, planes, postfix=2)\n            self.add_module(self.norm2_name, norm2)\n\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n        self.dilation = dilation\n        self.with_cp = with_cp\n\n    @property\n    def norm1(self):\n        \"\"\"nn.Module: normalization layer after the first convolution layer\"\"\"\n        return getattr(self, self.norm1_name) if self.with_norm else None\n\n    @property\n    def norm2(self):\n        \"\"\"nn.Module: normalization layer after the second convolution layer\"\"\"\n        return getattr(self, self.norm2_name) if self.with_norm else None\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n\n        identity = x\n\n        out = self.conv1(x)\n        if self.with_norm:\n            out = self.norm1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        if self.with_norm:\n            out = self.norm2(out)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out += identity\n\n        return out\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/se_layer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\nfrom mmcv.runner import BaseModule\n\n\nclass SELayer(BaseModule):\n    \"\"\"Squeeze-and-Excitation Module.\n\n    Args:\n        channels (int): The input (and output) channels of the SE layer.\n        ratio (int): Squeeze ratio in SELayer, the intermediate channel will be\n            ``int(channels/ratio)``. Default: 16.\n        conv_cfg (None or dict): Config dict for convolution layer.\n            Default: None, which means using conv2d.\n        act_cfg (dict or Sequence[dict]): Config dict for activation layer.\n            If act_cfg is a dict, two activation layers will be configurated\n            by this dict. If act_cfg is a sequence of dicts, the first\n            activation layer will be configurated by the first dict and the\n            second activation layer will be configurated by the second dict.\n            Default: (dict(type='ReLU'), dict(type='Sigmoid'))\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 channels,\n                 ratio=16,\n                 conv_cfg=None,\n                 act_cfg=(dict(type='ReLU'), dict(type='Sigmoid')),\n                 init_cfg=None):\n        super(SELayer, self).__init__(init_cfg)\n        if isinstance(act_cfg, dict):\n            act_cfg = (act_cfg, act_cfg)\n        assert len(act_cfg) == 2\n        assert mmcv.is_tuple_of(act_cfg, dict)\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.conv1 = ConvModule(\n            in_channels=channels,\n            out_channels=int(channels / ratio),\n            kernel_size=1,\n            stride=1,\n            conv_cfg=conv_cfg,\n            act_cfg=act_cfg[0])\n        self.conv2 = ConvModule(\n            in_channels=int(channels / ratio),\n            out_channels=channels,\n            kernel_size=1,\n            stride=1,\n            conv_cfg=conv_cfg,\n            act_cfg=act_cfg[1])\n\n    def forward(self, x):\n        out = self.global_avgpool(x)\n        out = self.conv1(out)\n        out = self.conv2(out)\n        return x * out\n\n\nclass DyReLU(BaseModule):\n    \"\"\"Dynamic ReLU (DyReLU) module.\n\n    See `Dynamic ReLU <https://arxiv.org/abs/2003.10027>`_ for details.\n    Current implementation is specialized for task-aware attention in DyHead.\n    HSigmoid arguments in default act_cfg follow DyHead official code.\n    https://github.com/microsoft/DynamicHead/blob/master/dyhead/dyrelu.py\n\n    Args:\n        channels (int): The input (and output) channels of DyReLU module.\n        ratio (int): Squeeze ratio in Squeeze-and-Excitation-like module,\n            the intermediate channel will be ``int(channels/ratio)``.\n            Default: 4.\n        conv_cfg (None or dict): Config dict for convolution layer.\n            Default: None, which means using conv2d.\n        act_cfg (dict or Sequence[dict]): Config dict for activation layer.\n            If act_cfg is a dict, two activation layers will be configurated\n            by this dict. If act_cfg is a sequence of dicts, the first\n            activation layer will be configurated by the first dict and the\n            second activation layer will be configurated by the second dict.\n            Default: (dict(type='ReLU'), dict(type='HSigmoid', bias=3.0,\n            divisor=6.0))\n        init_cfg (dict or list[dict], optional): Initialization config dict.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 channels,\n                 ratio=4,\n                 conv_cfg=None,\n                 act_cfg=(dict(type='ReLU'),\n                          dict(type='HSigmoid', bias=3.0, divisor=6.0)),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        if isinstance(act_cfg, dict):\n            act_cfg = (act_cfg, act_cfg)\n        assert len(act_cfg) == 2\n        assert mmcv.is_tuple_of(act_cfg, dict)\n        self.channels = channels\n        self.expansion = 4  # for a1, b1, a2, b2\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.conv1 = ConvModule(\n            in_channels=channels,\n            out_channels=int(channels / ratio),\n            kernel_size=1,\n            stride=1,\n            conv_cfg=conv_cfg,\n            act_cfg=act_cfg[0])\n        self.conv2 = ConvModule(\n            in_channels=int(channels / ratio),\n            out_channels=channels * self.expansion,\n            kernel_size=1,\n            stride=1,\n            conv_cfg=conv_cfg,\n            act_cfg=act_cfg[1])\n\n    def forward(self, x):\n        \"\"\"Forward function.\"\"\"\n        coeffs = self.global_avgpool(x)\n        coeffs = self.conv1(coeffs)\n        coeffs = self.conv2(coeffs) - 0.5  # value range: [-0.5, 0.5]\n        a1, b1, a2, b2 = torch.split(coeffs, self.channels, dim=1)\n        a1 = a1 * 2.0 + 1.0  # [-1.0, 1.0] + 1.0\n        a2 = a2 * 2.0  # [-1.0, 1.0]\n        out = torch.max(x * a1 + b1, x * a2 + b2)\n        return out\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/models/utils/transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport math\nimport warnings\nfrom typing import Sequence\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nfrom mmcv.cnn import (build_activation_layer, build_conv_layer,\n                      build_norm_layer, xavier_init)\nfrom mmcv.cnn.bricks.registry import (TRANSFORMER_LAYER,\n                                      TRANSFORMER_LAYER_SEQUENCE)\nfrom mmcv.cnn.bricks.transformer import (BaseTransformerLayer,\n                                         TransformerLayerSequence,\n                                         build_transformer_layer_sequence)\nfrom mmcv.runner.base_module import BaseModule\nfrom mmcv.utils import to_2tuple\nfrom torch.nn.init import normal_\n\nfrom mmdet.models.utils.builder import TRANSFORMER\n\ntry:\n    from mmcv.ops.multi_scale_deform_attn import MultiScaleDeformableAttention\n\nexcept ImportError:\n    warnings.warn(\n        '`MultiScaleDeformableAttention` in MMCV has been moved to '\n        '`mmcv.ops.multi_scale_deform_attn`, please update your MMCV')\n    from mmcv.cnn.bricks.transformer import MultiScaleDeformableAttention\n\n\ndef nlc_to_nchw(x, hw_shape):\n    \"\"\"Convert [N, L, C] shape tensor to [N, C, H, W] shape tensor.\n\n    Args:\n        x (Tensor): The input tensor of shape [N, L, C] before conversion.\n        hw_shape (Sequence[int]): The height and width of output feature map.\n\n    Returns:\n        Tensor: The output tensor of shape [N, C, H, W] after conversion.\n    \"\"\"\n    H, W = hw_shape\n    assert len(x.shape) == 3\n    B, L, C = x.shape\n    assert L == H * W, 'The seq_len does not match H, W'\n    return x.transpose(1, 2).reshape(B, C, H, W).contiguous()\n\n\ndef nchw_to_nlc(x):\n    \"\"\"Flatten [N, C, H, W] shape tensor to [N, L, C] shape tensor.\n\n    Args:\n        x (Tensor): The input tensor of shape [N, C, H, W] before conversion.\n\n    Returns:\n        Tensor: The output tensor of shape [N, L, C] after conversion.\n    \"\"\"\n    assert len(x.shape) == 4\n    return x.flatten(2).transpose(1, 2).contiguous()\n\n\nclass AdaptivePadding(nn.Module):\n    \"\"\"Applies padding to input (if needed) so that input can get fully covered\n    by filter you specified. It support two modes \"same\" and \"corner\". The\n    \"same\" mode is same with \"SAME\" padding mode in TensorFlow, pad zero around\n    input. The \"corner\"  mode would pad zero to bottom right.\n\n    Args:\n        kernel_size (int | tuple): Size of the kernel:\n        stride (int | tuple): Stride of the filter. Default: 1:\n        dilation (int | tuple): Spacing between kernel elements.\n            Default: 1\n        padding (str): Support \"same\" and \"corner\", \"corner\" mode\n            would pad zero to bottom right, and \"same\" mode would\n            pad zero around input. Default: \"corner\".\n    Example:\n        >>> kernel_size = 16\n        >>> stride = 16\n        >>> dilation = 1\n        >>> input = torch.rand(1, 1, 15, 17)\n        >>> adap_pad = AdaptivePadding(\n        >>>     kernel_size=kernel_size,\n        >>>     stride=stride,\n        >>>     dilation=dilation,\n        >>>     padding=\"corner\")\n        >>> out = adap_pad(input)\n        >>> assert (out.shape[2], out.shape[3]) == (16, 32)\n        >>> input = torch.rand(1, 1, 16, 17)\n        >>> out = adap_pad(input)\n        >>> assert (out.shape[2], out.shape[3]) == (16, 32)\n    \"\"\"\n\n    def __init__(self, kernel_size=1, stride=1, dilation=1, padding='corner'):\n\n        super(AdaptivePadding, self).__init__()\n\n        assert padding in ('same', 'corner')\n\n        kernel_size = to_2tuple(kernel_size)\n        stride = to_2tuple(stride)\n        padding = to_2tuple(padding)\n        dilation = to_2tuple(dilation)\n\n        self.padding = padding\n        self.kernel_size = kernel_size\n        self.stride = stride\n        self.dilation = dilation\n\n    def get_pad_shape(self, input_shape):\n        input_h, input_w = input_shape\n        kernel_h, kernel_w = self.kernel_size\n        stride_h, stride_w = self.stride\n        output_h = math.ceil(input_h / stride_h)\n        output_w = math.ceil(input_w / stride_w)\n        pad_h = max((output_h - 1) * stride_h +\n                    (kernel_h - 1) * self.dilation[0] + 1 - input_h, 0)\n        pad_w = max((output_w - 1) * stride_w +\n                    (kernel_w - 1) * self.dilation[1] + 1 - input_w, 0)\n        return pad_h, pad_w\n\n    def forward(self, x):\n        pad_h, pad_w = self.get_pad_shape(x.size()[-2:])\n        if pad_h > 0 or pad_w > 0:\n            if self.padding == 'corner':\n                x = F.pad(x, [0, pad_w, 0, pad_h])\n            elif self.padding == 'same':\n                x = F.pad(x, [\n                    pad_w // 2, pad_w - pad_w // 2, pad_h // 2,\n                    pad_h - pad_h // 2\n                ])\n        return x\n\n\nclass PatchEmbed(BaseModule):\n    \"\"\"Image to Patch Embedding.\n\n    We use a conv layer to implement PatchEmbed.\n\n    Args:\n        in_channels (int): The num of input channels. Default: 3\n        embed_dims (int): The dimensions of embedding. Default: 768\n        conv_type (str): The config dict for embedding\n            conv layer type selection. Default: \"Conv2d.\n        kernel_size (int): The kernel_size of embedding conv. Default: 16.\n        stride (int): The slide stride of embedding conv.\n            Default: None (Would be set as `kernel_size`).\n        padding (int | tuple | string ): The padding length of\n            embedding conv. When it is a string, it means the mode\n            of adaptive padding, support \"same\" and \"corner\" now.\n            Default: \"corner\".\n        dilation (int): The dilation rate of embedding conv. Default: 1.\n        bias (bool): Bias of embed conv. Default: True.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: None.\n        input_size (int | tuple | None): The size of input, which will be\n            used to calculate the out size. Only work when `dynamic_size`\n            is False. Default: None.\n        init_cfg (`mmcv.ConfigDict`, optional): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(\n        self,\n        in_channels=3,\n        embed_dims=768,\n        conv_type='Conv2d',\n        kernel_size=16,\n        stride=16,\n        padding='corner',\n        dilation=1,\n        bias=True,\n        norm_cfg=None,\n        input_size=None,\n        init_cfg=None,\n    ):\n        super(PatchEmbed, self).__init__(init_cfg=init_cfg)\n\n        self.embed_dims = embed_dims\n        if stride is None:\n            stride = kernel_size\n\n        kernel_size = to_2tuple(kernel_size)\n        stride = to_2tuple(stride)\n        dilation = to_2tuple(dilation)\n\n        if isinstance(padding, str):\n            self.adap_padding = AdaptivePadding(\n                kernel_size=kernel_size,\n                stride=stride,\n                dilation=dilation,\n                padding=padding)\n            # disable the padding of conv\n            padding = 0\n        else:\n            self.adap_padding = None\n        padding = to_2tuple(padding)\n\n        self.projection = build_conv_layer(\n            dict(type=conv_type),\n            in_channels=in_channels,\n            out_channels=embed_dims,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        if norm_cfg is not None:\n            self.norm = build_norm_layer(norm_cfg, embed_dims)[1]\n        else:\n            self.norm = None\n\n        if input_size:\n            input_size = to_2tuple(input_size)\n            # `init_out_size` would be used outside to\n            # calculate the num_patches\n            # when `use_abs_pos_embed` outside\n            self.init_input_size = input_size\n            if self.adap_padding:\n                pad_h, pad_w = self.adap_padding.get_pad_shape(input_size)\n                input_h, input_w = input_size\n                input_h = input_h + pad_h\n                input_w = input_w + pad_w\n                input_size = (input_h, input_w)\n\n            # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html\n            h_out = (input_size[0] + 2 * padding[0] - dilation[0] *\n                     (kernel_size[0] - 1) - 1) // stride[0] + 1\n            w_out = (input_size[1] + 2 * padding[1] - dilation[1] *\n                     (kernel_size[1] - 1) - 1) // stride[1] + 1\n            self.init_out_size = (h_out, w_out)\n        else:\n            self.init_input_size = None\n            self.init_out_size = None\n\n    def forward(self, x):\n        \"\"\"\n        Args:\n            x (Tensor): Has shape (B, C, H, W). In most case, C is 3.\n\n        Returns:\n            tuple: Contains merged results and its spatial shape.\n\n                - x (Tensor): Has shape (B, out_h * out_w, embed_dims)\n                - out_size (tuple[int]): Spatial shape of x, arrange as\n                    (out_h, out_w).\n        \"\"\"\n\n        if self.adap_padding:\n            x = self.adap_padding(x)\n\n        x = self.projection(x)\n        out_size = (x.shape[2], x.shape[3])\n        x = x.flatten(2).transpose(1, 2)\n        if self.norm is not None:\n            x = self.norm(x)\n        return x, out_size\n\n\nclass PatchMerging(BaseModule):\n    \"\"\"Merge patch feature map.\n\n    This layer groups feature map by kernel_size, and applies norm and linear\n    layers to the grouped feature map. Our implementation uses `nn.Unfold` to\n    merge patch, which is about 25% faster than original implementation.\n    Instead, we need to modify pretrained models for compatibility.\n\n    Args:\n        in_channels (int): The num of input channels.\n            to gets fully covered by filter and stride you specified..\n            Default: True.\n        out_channels (int): The num of output channels.\n        kernel_size (int | tuple, optional): the kernel size in the unfold\n            layer. Defaults to 2.\n        stride (int | tuple, optional): the stride of the sliding blocks in the\n            unfold layer. Default: None. (Would be set as `kernel_size`)\n        padding (int | tuple | string ): The padding length of\n            embedding conv. When it is a string, it means the mode\n            of adaptive padding, support \"same\" and \"corner\" now.\n            Default: \"corner\".\n        dilation (int | tuple, optional): dilation parameter in the unfold\n            layer. Default: 1.\n        bias (bool, optional): Whether to add bias in linear layer or not.\n            Defaults: False.\n        norm_cfg (dict, optional): Config dict for normalization layer.\n            Default: dict(type='LN').\n        init_cfg (dict, optional): The extra config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels,\n                 out_channels,\n                 kernel_size=2,\n                 stride=None,\n                 padding='corner',\n                 dilation=1,\n                 bias=False,\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None):\n        super().__init__(init_cfg=init_cfg)\n        self.in_channels = in_channels\n        self.out_channels = out_channels\n        if stride:\n            stride = stride\n        else:\n            stride = kernel_size\n\n        kernel_size = to_2tuple(kernel_size)\n        stride = to_2tuple(stride)\n        dilation = to_2tuple(dilation)\n\n        if isinstance(padding, str):\n            self.adap_padding = AdaptivePadding(\n                kernel_size=kernel_size,\n                stride=stride,\n                dilation=dilation,\n                padding=padding)\n            # disable the padding of unfold\n            padding = 0\n        else:\n            self.adap_padding = None\n\n        padding = to_2tuple(padding)\n        self.sampler = nn.Unfold(\n            kernel_size=kernel_size,\n            dilation=dilation,\n            padding=padding,\n            stride=stride)\n\n        sample_dim = kernel_size[0] * kernel_size[1] * in_channels\n\n        if norm_cfg is not None:\n            self.norm = build_norm_layer(norm_cfg, sample_dim)[1]\n        else:\n            self.norm = None\n\n        self.reduction = nn.Linear(sample_dim, out_channels, bias=bias)\n\n    def forward(self, x, input_size):\n        \"\"\"\n        Args:\n            x (Tensor): Has shape (B, H*W, C_in).\n            input_size (tuple[int]): The spatial shape of x, arrange as (H, W).\n                Default: None.\n\n        Returns:\n            tuple: Contains merged results and its spatial shape.\n\n                - x (Tensor): Has shape (B, Merged_H * Merged_W, C_out)\n                - out_size (tuple[int]): Spatial shape of x, arrange as\n                    (Merged_H, Merged_W).\n        \"\"\"\n        B, L, C = x.shape\n        assert isinstance(input_size, Sequence), f'Expect ' \\\n                                                 f'input_size is ' \\\n                                                 f'`Sequence` ' \\\n                                                 f'but get {input_size}'\n\n        H, W = input_size\n        assert L == H * W, 'input feature has wrong size'\n\n        x = x.view(B, H, W, C).permute([0, 3, 1, 2])  # B, C, H, W\n        # Use nn.Unfold to merge patch. About 25% faster than original method,\n        # but need to modify pretrained model for compatibility\n\n        if self.adap_padding:\n            x = self.adap_padding(x)\n            H, W = x.shape[-2:]\n\n        x = self.sampler(x)\n        # if kernel_size=2 and stride=2, x should has shape (B, 4*C, H/2*W/2)\n\n        out_h = (H + 2 * self.sampler.padding[0] - self.sampler.dilation[0] *\n                 (self.sampler.kernel_size[0] - 1) -\n                 1) // self.sampler.stride[0] + 1\n        out_w = (W + 2 * self.sampler.padding[1] - self.sampler.dilation[1] *\n                 (self.sampler.kernel_size[1] - 1) -\n                 1) // self.sampler.stride[1] + 1\n\n        output_size = (out_h, out_w)\n        x = x.transpose(1, 2)  # B, H/2*W/2, 4*C\n        x = self.norm(x) if self.norm else x\n        x = self.reduction(x)\n        return x, output_size\n\n\ndef inverse_sigmoid(x, eps=1e-5):\n    \"\"\"Inverse function of sigmoid.\n\n    Args:\n        x (Tensor): The tensor to do the\n            inverse.\n        eps (float): EPS avoid numerical\n            overflow. Defaults 1e-5.\n    Returns:\n        Tensor: The x has passed the inverse\n            function of sigmoid, has same\n            shape with input.\n    \"\"\"\n    x = x.clamp(min=0, max=1)\n    x1 = x.clamp(min=eps)\n    x2 = (1 - x).clamp(min=eps)\n    return torch.log(x1 / x2)\n\n\n@TRANSFORMER_LAYER.register_module()\nclass DetrTransformerDecoderLayer(BaseTransformerLayer):\n    \"\"\"Implements decoder layer in DETR transformer.\n\n    Args:\n        attn_cfgs (list[`mmcv.ConfigDict`] | list[dict] | dict )):\n            Configs for self_attention or cross_attention, the order\n            should be consistent with it in `operation_order`. If it is\n            a dict, it would be expand to the number of attention in\n            `operation_order`.\n        feedforward_channels (int): The hidden dimension for FFNs.\n        ffn_dropout (float): Probability of an element to be zeroed\n            in ffn. Default 0.0.\n        operation_order (tuple[str]): The execution order of operation\n            in transformer. Such as ('self_attn', 'norm', 'ffn', 'norm').\n            Default：None\n        act_cfg (dict): The activation config for FFNs. Default: `LN`\n        norm_cfg (dict): Config dict for normalization layer.\n            Default: `LN`.\n        ffn_num_fcs (int): The number of fully-connected layers in FFNs.\n            Default：2.\n    \"\"\"\n\n    def __init__(self,\n                 attn_cfgs,\n                 feedforward_channels,\n                 ffn_dropout=0.0,\n                 operation_order=None,\n                 act_cfg=dict(type='ReLU', inplace=True),\n                 norm_cfg=dict(type='LN'),\n                 ffn_num_fcs=2,\n                 **kwargs):\n        super(DetrTransformerDecoderLayer, self).__init__(\n            attn_cfgs=attn_cfgs,\n            feedforward_channels=feedforward_channels,\n            ffn_dropout=ffn_dropout,\n            operation_order=operation_order,\n            act_cfg=act_cfg,\n            norm_cfg=norm_cfg,\n            ffn_num_fcs=ffn_num_fcs,\n            **kwargs)\n        assert len(operation_order) == 6\n        assert set(operation_order) == set(\n            ['self_attn', 'norm', 'cross_attn', 'ffn'])\n\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass DetrTransformerEncoder(TransformerLayerSequence):\n    \"\"\"TransformerEncoder of DETR.\n\n    Args:\n        post_norm_cfg (dict): Config of last normalization layer. Default：\n            `LN`. Only used when `self.pre_norm` is `True`\n    \"\"\"\n\n    def __init__(self, *args, post_norm_cfg=dict(type='LN'), **kwargs):\n        super(DetrTransformerEncoder, self).__init__(*args, **kwargs)\n        if post_norm_cfg is not None:\n            self.post_norm = build_norm_layer(\n                post_norm_cfg, self.embed_dims)[1] if self.pre_norm else None\n        else:\n            assert not self.pre_norm, f'Use prenorm in ' \\\n                                      f'{self.__class__.__name__},' \\\n                                      f'Please specify post_norm_cfg'\n            self.post_norm = None\n\n    def forward(self, *args, **kwargs):\n        \"\"\"Forward function for `TransformerCoder`.\n\n        Returns:\n            Tensor: forwarded results with shape [num_query, bs, embed_dims].\n        \"\"\"\n        x = super(DetrTransformerEncoder, self).forward(*args, **kwargs)\n        if self.post_norm is not None:\n            x = self.post_norm(x)\n        return x\n\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass DetrTransformerDecoder(TransformerLayerSequence):\n    \"\"\"Implements the decoder in DETR transformer.\n\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        post_norm_cfg (dict): Config of last normalization layer. Default：\n            `LN`.\n    \"\"\"\n\n    def __init__(self,\n                 *args,\n                 post_norm_cfg=dict(type='LN'),\n                 return_intermediate=False,\n                 **kwargs):\n\n        super(DetrTransformerDecoder, self).__init__(*args, **kwargs)\n        self.return_intermediate = return_intermediate\n        if post_norm_cfg is not None:\n            self.post_norm = build_norm_layer(post_norm_cfg,\n                                              self.embed_dims)[1]\n        else:\n            self.post_norm = None\n\n    def forward(self, query, *args, **kwargs):\n        \"\"\"Forward function for `TransformerDecoder`.\n\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        if not self.return_intermediate:\n            x = super().forward(query, *args, **kwargs)\n            if self.post_norm:\n                x = self.post_norm(x)[None]\n            return x\n\n        intermediate = []\n        for layer in self.layers:\n            query = layer(query, *args, **kwargs)\n            if self.return_intermediate:\n                if self.post_norm is not None:\n                    intermediate.append(self.post_norm(query))\n                else:\n                    intermediate.append(query)\n        return torch.stack(intermediate)\n\n\n@TRANSFORMER.register_module()\nclass Transformer(BaseModule):\n    \"\"\"Implements the DETR transformer.\n\n    Following the official DETR implementation, this module copy-paste\n    from torch.nn.Transformer with modifications:\n\n        * positional encodings are passed in MultiheadAttention\n        * extra LN at the end of encoder is removed\n        * decoder returns a stack of activations from all decoding layers\n\n    See `paper: End-to-End Object Detection with Transformers\n    <https://arxiv.org/pdf/2005.12872>`_ for details.\n\n    Args:\n        encoder (`mmcv.ConfigDict` | Dict): Config of\n            TransformerEncoder. Defaults to None.\n        decoder ((`mmcv.ConfigDict` | Dict)): Config of\n            TransformerDecoder. Defaults to None\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Defaults to None.\n    \"\"\"\n\n    def __init__(self, encoder=None, decoder=None, init_cfg=None):\n        super(Transformer, self).__init__(init_cfg=init_cfg)\n        self.encoder = build_transformer_layer_sequence(encoder)\n        self.decoder = build_transformer_layer_sequence(decoder)\n        self.embed_dims = self.encoder.embed_dims\n\n    def init_weights(self):\n        # follow the official DETR to init parameters\n        for m in self.modules():\n            if hasattr(m, 'weight') and m.weight.dim() > 1:\n                xavier_init(m, distribution='uniform')\n        self._is_init = True\n\n    def forward(self, x, mask, query_embed, pos_embed):\n        \"\"\"Forward function for `Transformer`.\n\n        Args:\n            x (Tensor): Input query with shape [bs, c, h, w] where\n                c = embed_dims.\n            mask (Tensor): The key_padding_mask used for encoder and decoder,\n                with shape [bs, h, w].\n            query_embed (Tensor): The query embedding for decoder, with shape\n                [num_query, c].\n            pos_embed (Tensor): The positional encoding for encoder and\n                decoder, with the same shape as `x`.\n\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n\n                - out_dec: Output from decoder. If return_intermediate_dec \\\n                      is True output has shape [num_dec_layers, bs,\n                      num_query, embed_dims], else has shape [1, bs, \\\n                      num_query, embed_dims].\n                - memory: Output results from encoder, with shape \\\n                      [bs, embed_dims, h, w].\n        \"\"\"\n        bs, c, h, w = x.shape\n        # use `view` instead of `flatten` for dynamically exporting to ONNX\n        x = x.view(bs, c, -1).permute(2, 0, 1)  # [bs, c, h, w] -> [h*w, bs, c]\n        pos_embed = pos_embed.view(bs, c, -1).permute(2, 0, 1)\n        query_embed = query_embed.unsqueeze(1).repeat(\n            1, bs, 1)  # [num_query, dim] -> [num_query, bs, dim]\n        mask = mask.view(bs, -1)  # [bs, h, w] -> [bs, h*w]\n        memory = self.encoder(\n            query=x,\n            key=None,\n            value=None,\n            query_pos=pos_embed,\n            query_key_padding_mask=mask)\n        target = torch.zeros_like(query_embed)\n        # out_dec: [num_layers, num_query, bs, dim]\n        out_dec = self.decoder(\n            query=target,\n            key=memory,\n            value=memory,\n            key_pos=pos_embed,\n            query_pos=query_embed,\n            key_padding_mask=mask)\n        out_dec = out_dec.transpose(1, 2)\n        memory = memory.permute(1, 2, 0).reshape(bs, c, h, w)\n        return out_dec, memory\n\n\n@TRANSFORMER_LAYER_SEQUENCE.register_module()\nclass DeformableDetrTransformerDecoder(TransformerLayerSequence):\n    \"\"\"Implements the decoder in DETR transformer.\n\n    Args:\n        return_intermediate (bool): Whether to return intermediate outputs.\n        coder_norm_cfg (dict): Config of last normalization layer. Default：\n            `LN`.\n    \"\"\"\n\n    def __init__(self, *args, return_intermediate=False, **kwargs):\n\n        super(DeformableDetrTransformerDecoder, self).__init__(*args, **kwargs)\n        self.return_intermediate = return_intermediate\n\n    def forward(self,\n                query,\n                *args,\n                reference_points=None,\n                valid_ratios=None,\n                reg_branches=None,\n                **kwargs):\n        \"\"\"Forward function for `TransformerDecoder`.\n\n        Args:\n            query (Tensor): Input query with shape\n                `(num_query, bs, embed_dims)`.\n            reference_points (Tensor): The reference\n                points of offset. has shape\n                (bs, num_query, 4) when as_two_stage,\n                otherwise has shape ((bs, num_query, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n            reg_branch: (obj:`nn.ModuleList`): Used for\n                refining the regression results. Only would\n                be passed when with_box_refine is True,\n                otherwise would be passed a `None`.\n\n        Returns:\n            Tensor: Results with shape [1, num_query, bs, embed_dims] when\n                return_intermediate is `False`, otherwise it has shape\n                [num_layers, num_query, bs, embed_dims].\n        \"\"\"\n        output = query\n        intermediate = []\n        intermediate_reference_points = []\n        for lid, layer in enumerate(self.layers):\n            if reference_points.shape[-1] == 4:\n                reference_points_input = reference_points[:, :, None] * \\\n                    torch.cat([valid_ratios, valid_ratios], -1)[:, None]\n            else:\n                assert reference_points.shape[-1] == 2\n                reference_points_input = reference_points[:, :, None] * \\\n                    valid_ratios[:, None]\n            output = layer(\n                output,\n                *args,\n                reference_points=reference_points_input,\n                **kwargs)\n            output = output.permute(1, 0, 2)\n\n            if reg_branches is not None:\n                tmp = reg_branches[lid](output)\n                if reference_points.shape[-1] == 4:\n                    new_reference_points = tmp + inverse_sigmoid(\n                        reference_points)\n                    new_reference_points = new_reference_points.sigmoid()\n                else:\n                    assert reference_points.shape[-1] == 2\n                    new_reference_points = tmp\n                    new_reference_points[..., :2] = tmp[\n                        ..., :2] + inverse_sigmoid(reference_points)\n                    new_reference_points = new_reference_points.sigmoid()\n                reference_points = new_reference_points.detach()\n\n            output = output.permute(1, 0, 2)\n            if self.return_intermediate:\n                intermediate.append(output)\n                intermediate_reference_points.append(reference_points)\n\n        if self.return_intermediate:\n            return torch.stack(intermediate), torch.stack(\n                intermediate_reference_points)\n\n        return output, reference_points\n\n\n@TRANSFORMER.register_module()\nclass DeformableDetrTransformer(Transformer):\n    \"\"\"Implements the DeformableDETR transformer.\n\n    Args:\n        as_two_stage (bool): Generate query from encoder features.\n            Default: False.\n        num_feature_levels (int): Number of feature maps from FPN:\n            Default: 4.\n        two_stage_num_proposals (int): Number of proposals when set\n            `as_two_stage` as True. Default: 300.\n    \"\"\"\n\n    def __init__(self,\n                 as_two_stage=False,\n                 num_feature_levels=4,\n                 two_stage_num_proposals=300,\n                 **kwargs):\n        super(DeformableDetrTransformer, self).__init__(**kwargs)\n        self.as_two_stage = as_two_stage\n        self.num_feature_levels = num_feature_levels\n        self.two_stage_num_proposals = two_stage_num_proposals\n        self.embed_dims = self.encoder.embed_dims\n        self.init_layers()\n\n    def init_layers(self):\n        \"\"\"Initialize layers of the DeformableDetrTransformer.\"\"\"\n        self.level_embeds = nn.Parameter(\n            torch.Tensor(self.num_feature_levels, self.embed_dims))\n\n        if self.as_two_stage:\n            self.enc_output = nn.Linear(self.embed_dims, self.embed_dims)\n            self.enc_output_norm = nn.LayerNorm(self.embed_dims)\n            self.pos_trans = nn.Linear(self.embed_dims * 2,\n                                       self.embed_dims * 2)\n            self.pos_trans_norm = nn.LayerNorm(self.embed_dims * 2)\n        else:\n            self.reference_points = nn.Linear(self.embed_dims, 2)\n\n    def init_weights(self):\n        \"\"\"Initialize the transformer weights.\"\"\"\n        for p in self.parameters():\n            if p.dim() > 1:\n                nn.init.xavier_uniform_(p)\n        for m in self.modules():\n            if isinstance(m, MultiScaleDeformableAttention):\n                m.init_weights()\n        if not self.as_two_stage:\n            xavier_init(self.reference_points, distribution='uniform', bias=0.)\n        normal_(self.level_embeds)\n\n    def gen_encoder_output_proposals(self, memory, memory_padding_mask,\n                                     spatial_shapes):\n        \"\"\"Generate proposals from encoded memory.\n\n        Args:\n            memory (Tensor) : The output of encoder,\n                has shape (bs, num_key, embed_dim).  num_key is\n                equal the number of points on feature map from\n                all level.\n            memory_padding_mask (Tensor): Padding mask for memory.\n                has shape (bs, num_key).\n            spatial_shapes (Tensor): The shape of all feature maps.\n                has shape (num_level, 2).\n\n        Returns:\n            tuple: A tuple of feature map and bbox prediction.\n\n                - output_memory (Tensor): The input of decoder,  \\\n                    has shape (bs, num_key, embed_dim).  num_key is \\\n                    equal the number of points on feature map from \\\n                    all levels.\n                - output_proposals (Tensor): The normalized proposal \\\n                    after a inverse sigmoid, has shape \\\n                    (bs, num_keys, 4).\n        \"\"\"\n\n        N, S, C = memory.shape\n        proposals = []\n        _cur = 0\n        for lvl, (H, W) in enumerate(spatial_shapes):\n            mask_flatten_ = memory_padding_mask[:, _cur:(_cur + H * W)].view(\n                N, H, W, 1)\n            valid_H = torch.sum(~mask_flatten_[:, :, 0, 0], 1)\n            valid_W = torch.sum(~mask_flatten_[:, 0, :, 0], 1)\n\n            grid_y, grid_x = torch.meshgrid(\n                torch.linspace(\n                    0, H - 1, H, dtype=torch.float32, device=memory.device),\n                torch.linspace(\n                    0, W - 1, W, dtype=torch.float32, device=memory.device))\n            grid = torch.cat([grid_x.unsqueeze(-1), grid_y.unsqueeze(-1)], -1)\n\n            scale = torch.cat([valid_W.unsqueeze(-1),\n                               valid_H.unsqueeze(-1)], 1).view(N, 1, 1, 2)\n            grid = (grid.unsqueeze(0).expand(N, -1, -1, -1) + 0.5) / scale\n            wh = torch.ones_like(grid) * 0.05 * (2.0**lvl)\n            proposal = torch.cat((grid, wh), -1).view(N, -1, 4)\n            proposals.append(proposal)\n            _cur += (H * W)\n        output_proposals = torch.cat(proposals, 1)\n        output_proposals_valid = ((output_proposals > 0.01) &\n                                  (output_proposals < 0.99)).all(\n                                      -1, keepdim=True)\n        output_proposals = torch.log(output_proposals / (1 - output_proposals))\n        output_proposals = output_proposals.masked_fill(\n            memory_padding_mask.unsqueeze(-1), float('inf'))\n        output_proposals = output_proposals.masked_fill(\n            ~output_proposals_valid, float('inf'))\n\n        output_memory = memory\n        output_memory = output_memory.masked_fill(\n            memory_padding_mask.unsqueeze(-1), float(0))\n        output_memory = output_memory.masked_fill(~output_proposals_valid,\n                                                  float(0))\n        output_memory = self.enc_output_norm(self.enc_output(output_memory))\n        return output_memory, output_proposals\n\n    @staticmethod\n    def get_reference_points(spatial_shapes, valid_ratios, device):\n        \"\"\"Get the reference points used in decoder.\n\n        Args:\n            spatial_shapes (Tensor): The shape of all\n                feature maps, has shape (num_level, 2).\n            valid_ratios (Tensor): The radios of valid\n                points on the feature map, has shape\n                (bs, num_levels, 2)\n            device (obj:`device`): The device where\n                reference_points should be.\n\n        Returns:\n            Tensor: reference points used in decoder, has \\\n                shape (bs, num_keys, num_levels, 2).\n        \"\"\"\n        reference_points_list = []\n        for lvl, (H, W) in enumerate(spatial_shapes):\n            #  TODO  check this 0.5\n            ref_y, ref_x = torch.meshgrid(\n                torch.linspace(\n                    0.5, H - 0.5, H, dtype=torch.float32, device=device),\n                torch.linspace(\n                    0.5, W - 0.5, W, dtype=torch.float32, device=device))\n            ref_y = ref_y.reshape(-1)[None] / (\n                valid_ratios[:, None, lvl, 1] * H)\n            ref_x = ref_x.reshape(-1)[None] / (\n                valid_ratios[:, None, lvl, 0] * W)\n            ref = torch.stack((ref_x, ref_y), -1)\n            reference_points_list.append(ref)\n        reference_points = torch.cat(reference_points_list, 1)\n        reference_points = reference_points[:, :, None] * valid_ratios[:, None]\n        return reference_points\n\n    def get_valid_ratio(self, mask):\n        \"\"\"Get the valid radios of feature maps of all  level.\"\"\"\n        _, H, W = mask.shape\n        valid_H = torch.sum(~mask[:, :, 0], 1)\n        valid_W = torch.sum(~mask[:, 0, :], 1)\n        valid_ratio_h = valid_H.float() / H\n        valid_ratio_w = valid_W.float() / W\n        valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)\n        return valid_ratio\n\n    def get_proposal_pos_embed(self,\n                               proposals,\n                               num_pos_feats=128,\n                               temperature=10000):\n        \"\"\"Get the position embedding of proposal.\"\"\"\n        scale = 2 * math.pi\n        dim_t = torch.arange(\n            num_pos_feats, dtype=torch.float32, device=proposals.device)\n        dim_t = temperature**(2 * (dim_t // 2) / num_pos_feats)\n        # N, L, 4\n        proposals = proposals.sigmoid() * scale\n        # N, L, 4, 128\n        pos = proposals[:, :, :, None] / dim_t\n        # N, L, 4, 64, 2\n        pos = torch.stack((pos[:, :, :, 0::2].sin(), pos[:, :, :, 1::2].cos()),\n                          dim=4).flatten(2)\n        return pos\n\n    def forward(self,\n                mlvl_feats,\n                mlvl_masks,\n                query_embed,\n                mlvl_pos_embeds,\n                reg_branches=None,\n                cls_branches=None,\n                **kwargs):\n        \"\"\"Forward function for `Transformer`.\n\n        Args:\n            mlvl_feats (list(Tensor)): Input queries from\n                different level. Each element has shape\n                [bs, embed_dims, h, w].\n            mlvl_masks (list(Tensor)): The key_padding_mask from\n                different level used for encoder and decoder,\n                each element has shape  [bs, h, w].\n            query_embed (Tensor): The query embedding for decoder,\n                with shape [num_query, c].\n            mlvl_pos_embeds (list(Tensor)): The positional encoding\n                of feats from different level, has the shape\n                 [bs, embed_dims, h, w].\n            reg_branches (obj:`nn.ModuleList`): Regression heads for\n                feature maps from each decoder layer. Only would\n                be passed when\n                `with_box_refine` is True. Default to None.\n            cls_branches (obj:`nn.ModuleList`): Classification heads\n                for feature maps from each decoder layer. Only would\n                 be passed when `as_two_stage`\n                 is True. Default to None.\n\n\n        Returns:\n            tuple[Tensor]: results of decoder containing the following tensor.\n\n                - inter_states: Outputs from decoder. If\n                    return_intermediate_dec is True output has shape \\\n                      (num_dec_layers, bs, num_query, embed_dims), else has \\\n                      shape (1, bs, num_query, embed_dims).\n                - init_reference_out: The initial value of reference \\\n                    points, has shape (bs, num_queries, 4).\n                - inter_references_out: The internal value of reference \\\n                    points in decoder, has shape \\\n                    (num_dec_layers, bs,num_query, embed_dims)\n                - enc_outputs_class: The classification score of \\\n                    proposals generated from \\\n                    encoder's feature maps, has shape \\\n                    (batch, h*w, num_classes). \\\n                    Only would be returned when `as_two_stage` is True, \\\n                    otherwise None.\n                - enc_outputs_coord_unact: The regression results \\\n                    generated from encoder's feature maps., has shape \\\n                    (batch, h*w, 4). Only would \\\n                    be returned when `as_two_stage` is True, \\\n                    otherwise None.\n        \"\"\"\n        assert self.as_two_stage or query_embed is not None\n\n        feat_flatten = []\n        mask_flatten = []\n        lvl_pos_embed_flatten = []\n        spatial_shapes = []\n        for lvl, (feat, mask, pos_embed) in enumerate(\n                zip(mlvl_feats, mlvl_masks, mlvl_pos_embeds)):\n            bs, c, h, w = feat.shape\n            spatial_shape = (h, w)\n            spatial_shapes.append(spatial_shape)\n            feat = feat.flatten(2).transpose(1, 2)\n            mask = mask.flatten(1)\n            pos_embed = pos_embed.flatten(2).transpose(1, 2)\n            lvl_pos_embed = pos_embed + self.level_embeds[lvl].view(1, 1, -1)\n            lvl_pos_embed_flatten.append(lvl_pos_embed)\n            feat_flatten.append(feat)\n            mask_flatten.append(mask)\n        feat_flatten = torch.cat(feat_flatten, 1)\n        mask_flatten = torch.cat(mask_flatten, 1)\n        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)\n        spatial_shapes = torch.as_tensor(\n            spatial_shapes, dtype=torch.long, device=feat_flatten.device)\n        level_start_index = torch.cat((spatial_shapes.new_zeros(\n            (1, )), spatial_shapes.prod(1).cumsum(0)[:-1]))\n        valid_ratios = torch.stack(\n            [self.get_valid_ratio(m) for m in mlvl_masks], 1)\n\n        reference_points = \\\n            self.get_reference_points(spatial_shapes,\n                                      valid_ratios,\n                                      device=feat.device)\n\n        feat_flatten = feat_flatten.permute(1, 0, 2)  # (H*W, bs, embed_dims)\n        lvl_pos_embed_flatten = lvl_pos_embed_flatten.permute(\n            1, 0, 2)  # (H*W, bs, embed_dims)\n        memory = self.encoder(\n            query=feat_flatten,\n            key=None,\n            value=None,\n            query_pos=lvl_pos_embed_flatten,\n            query_key_padding_mask=mask_flatten,\n            spatial_shapes=spatial_shapes,\n            reference_points=reference_points,\n            level_start_index=level_start_index,\n            valid_ratios=valid_ratios,\n            **kwargs)\n\n        memory = memory.permute(1, 0, 2)\n        bs, _, c = memory.shape\n        if self.as_two_stage:\n            output_memory, output_proposals = \\\n                self.gen_encoder_output_proposals(\n                    memory, mask_flatten, spatial_shapes)\n            enc_outputs_class = cls_branches[self.decoder.num_layers](\n                output_memory)\n            enc_outputs_coord_unact = \\\n                reg_branches[\n                    self.decoder.num_layers](output_memory) + output_proposals\n\n            topk = self.two_stage_num_proposals\n            # We only use the first channel in enc_outputs_class as foreground,\n            # the other (num_classes - 1) channels are actually not used.\n            # Its targets are set to be 0s, which indicates the first\n            # class (foreground) because we use [0, num_classes - 1] to\n            # indicate class labels, background class is indicated by\n            # num_classes (similar convention in RPN).\n            # See https://github.com/open-mmlab/mmdetection/blob/master/mmdet/models/dense_heads/deformable_detr_head.py#L241 # noqa\n            # This follows the official implementation of Deformable DETR.\n            topk_proposals = torch.topk(\n                enc_outputs_class[..., 0], topk, dim=1)[1]\n            topk_coords_unact = torch.gather(\n                enc_outputs_coord_unact, 1,\n                topk_proposals.unsqueeze(-1).repeat(1, 1, 4))\n            topk_coords_unact = topk_coords_unact.detach()\n            reference_points = topk_coords_unact.sigmoid()\n            init_reference_out = reference_points\n            pos_trans_out = self.pos_trans_norm(\n                self.pos_trans(self.get_proposal_pos_embed(topk_coords_unact)))\n            query_pos, query = torch.split(pos_trans_out, c, dim=2)\n        else:\n            query_pos, query = torch.split(query_embed, c, dim=1)\n            query_pos = query_pos.unsqueeze(0).expand(bs, -1, -1)\n            query = query.unsqueeze(0).expand(bs, -1, -1)\n            reference_points = self.reference_points(query_pos).sigmoid()\n            init_reference_out = reference_points\n\n        # decoder\n        query = query.permute(1, 0, 2)\n        memory = memory.permute(1, 0, 2)\n        query_pos = query_pos.permute(1, 0, 2)\n        inter_states, inter_references = self.decoder(\n            query=query,\n            key=None,\n            value=memory,\n            query_pos=query_pos,\n            key_padding_mask=mask_flatten,\n            reference_points=reference_points,\n            spatial_shapes=spatial_shapes,\n            level_start_index=level_start_index,\n            valid_ratios=valid_ratios,\n            reg_branches=reg_branches,\n            **kwargs)\n\n        inter_references_out = inter_references\n        if self.as_two_stage:\n            return inter_states, init_reference_out,\\\n                inter_references_out, enc_outputs_class,\\\n                enc_outputs_coord_unact\n        return inter_states, init_reference_out, \\\n            inter_references_out, None, None\n\n\n@TRANSFORMER.register_module()\nclass DynamicConv(BaseModule):\n    \"\"\"Implements Dynamic Convolution.\n\n    This module generate parameters for each sample and\n    use bmm to implement 1*1 convolution. Code is modified\n    from the `official github repo <https://github.com/PeizeSun/\n    SparseR-CNN/blob/main/projects/SparseRCNN/sparsercnn/head.py#L258>`_ .\n\n    Args:\n        in_channels (int): The input feature channel.\n            Defaults to 256.\n        feat_channels (int): The inner feature channel.\n            Defaults to 64.\n        out_channels (int, optional): The output feature channel.\n            When not specified, it will be set to `in_channels`\n            by default\n        input_feat_shape (int): The shape of input feature.\n            Defaults to 7.\n        with_proj (bool): Project two-dimentional feature to\n            one-dimentional feature. Default to True.\n        act_cfg (dict): The activation config for DynamicConv.\n        norm_cfg (dict): Config dict for normalization layer. Default\n            layer normalization.\n        init_cfg (obj:`mmcv.ConfigDict`): The Config for initialization.\n            Default: None.\n    \"\"\"\n\n    def __init__(self,\n                 in_channels=256,\n                 feat_channels=64,\n                 out_channels=None,\n                 input_feat_shape=7,\n                 with_proj=True,\n                 act_cfg=dict(type='ReLU', inplace=True),\n                 norm_cfg=dict(type='LN'),\n                 init_cfg=None):\n        super(DynamicConv, self).__init__(init_cfg)\n        self.in_channels = in_channels\n        self.feat_channels = feat_channels\n        self.out_channels_raw = out_channels\n        self.input_feat_shape = input_feat_shape\n        self.with_proj = with_proj\n        self.act_cfg = act_cfg\n        self.norm_cfg = norm_cfg\n        self.out_channels = out_channels if out_channels else in_channels\n\n        self.num_params_in = self.in_channels * self.feat_channels\n        self.num_params_out = self.out_channels * self.feat_channels\n        self.dynamic_layer = nn.Linear(\n            self.in_channels, self.num_params_in + self.num_params_out)\n\n        self.norm_in = build_norm_layer(norm_cfg, self.feat_channels)[1]\n        self.norm_out = build_norm_layer(norm_cfg, self.out_channels)[1]\n\n        self.activation = build_activation_layer(act_cfg)\n\n        num_output = self.out_channels * input_feat_shape**2\n        if self.with_proj:\n            self.fc_layer = nn.Linear(num_output, self.out_channels)\n            self.fc_norm = build_norm_layer(norm_cfg, self.out_channels)[1]\n\n    def forward(self, param_feature, input_feature):\n        \"\"\"Forward function for `DynamicConv`.\n\n        Args:\n            param_feature (Tensor): The feature can be used\n                to generate the parameter, has shape\n                (num_all_proposals, in_channels).\n            input_feature (Tensor): Feature that\n                interact with parameters, has shape\n                (num_all_proposals, in_channels, H, W).\n\n        Returns:\n            Tensor: The output feature has shape\n            (num_all_proposals, out_channels).\n        \"\"\"\n        input_feature = input_feature.flatten(2).permute(2, 0, 1)\n\n        input_feature = input_feature.permute(1, 0, 2)\n        parameters = self.dynamic_layer(param_feature)\n\n        param_in = parameters[:, :self.num_params_in].view(\n            -1, self.in_channels, self.feat_channels)\n        param_out = parameters[:, -self.num_params_out:].view(\n            -1, self.feat_channels, self.out_channels)\n\n        # input_feature has shape (num_all_proposals, H*W, in_channels)\n        # param_in has shape (num_all_proposals, in_channels, feat_channels)\n        # feature has shape (num_all_proposals, H*W, feat_channels)\n        features = torch.bmm(input_feature, param_in)\n        features = self.norm_in(features)\n        features = self.activation(features)\n\n        # param_out has shape (batch_size, feat_channels, out_channels)\n        features = torch.bmm(features, param_out)\n        features = self.norm_out(features)\n        features = self.activation(features)\n\n        if self.with_proj:\n            features = features.flatten(1)\n            features = self.fc_layer(features)\n            features = self.fc_norm(features)\n            features = self.activation(features)\n\n        return features\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .collect_env import collect_env\nfrom .compat_config import compat_cfg\nfrom .logger import get_caller_name, get_root_logger, log_img_scale\nfrom .memory import AvoidCUDAOOM, AvoidOOM\nfrom .misc import find_latest_checkpoint, update_data_root\nfrom .replace_cfg_vals import replace_cfg_vals\nfrom .setup_env import setup_multi_processes\nfrom .split_batch import split_batch\nfrom .util_distribution import build_ddp, build_dp, get_device\n\n__all__ = [\n    'get_root_logger', 'collect_env', 'find_latest_checkpoint',\n    'update_data_root', 'setup_multi_processes', 'get_caller_name',\n    'log_img_scale', 'compat_cfg', 'split_batch', 'build_ddp', 'build_dp',\n    'get_device', 'replace_cfg_vals', 'AvoidOOM', 'AvoidCUDAOOM'\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/collect_env.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmcv.utils import collect_env as collect_base_env\nfrom mmcv.utils import get_git_hash\n\nimport mmdet\n\n\ndef collect_env():\n    \"\"\"Collect the information of the running environments.\"\"\"\n    env_info = collect_base_env()\n    env_info['MMDetection'] = mmdet.__version__ + '+' + get_git_hash()[:7]\n    return env_info\n\n\nif __name__ == '__main__':\n    for name, val in collect_env().items():\n        print(f'{name}: {val}')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/compat_config.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport warnings\n\nfrom mmcv import ConfigDict\n\n\ndef compat_cfg(cfg):\n    \"\"\"This function would modify some filed to keep the compatibility of\n    config.\n\n    For example, it will move some args which will be deprecated to the correct\n    fields.\n    \"\"\"\n    cfg = copy.deepcopy(cfg)\n    cfg = compat_imgs_per_gpu(cfg)\n    cfg = compat_loader_args(cfg)\n    cfg = compat_runner_args(cfg)\n    return cfg\n\n\ndef compat_runner_args(cfg):\n    if 'runner' not in cfg:\n        cfg.runner = ConfigDict({\n            'type': 'EpochBasedRunner',\n            'max_epochs': cfg.total_epochs\n        })\n        warnings.warn(\n            'config is now expected to have a `runner` section, '\n            'please set `runner` in your config.', UserWarning)\n    else:\n        if 'total_epochs' in cfg:\n            assert cfg.total_epochs == cfg.runner.max_epochs\n    return cfg\n\n\ndef compat_imgs_per_gpu(cfg):\n    cfg = copy.deepcopy(cfg)\n    if 'imgs_per_gpu' in cfg.data:\n        warnings.warn('\"imgs_per_gpu\" is deprecated in MMDet V2.0. '\n                      'Please use \"samples_per_gpu\" instead')\n        if 'samples_per_gpu' in cfg.data:\n            warnings.warn(\n                f'Got \"imgs_per_gpu\"={cfg.data.imgs_per_gpu} and '\n                f'\"samples_per_gpu\"={cfg.data.samples_per_gpu}, \"imgs_per_gpu\"'\n                f'={cfg.data.imgs_per_gpu} is used in this experiments')\n        else:\n            warnings.warn('Automatically set \"samples_per_gpu\"=\"imgs_per_gpu\"='\n                          f'{cfg.data.imgs_per_gpu} in this experiments')\n        cfg.data.samples_per_gpu = cfg.data.imgs_per_gpu\n    return cfg\n\n\ndef compat_loader_args(cfg):\n    \"\"\"Deprecated sample_per_gpu in cfg.data.\"\"\"\n\n    cfg = copy.deepcopy(cfg)\n    if 'train_dataloader' not in cfg.data:\n        cfg.data['train_dataloader'] = ConfigDict()\n    if 'val_dataloader' not in cfg.data:\n        cfg.data['val_dataloader'] = ConfigDict()\n    if 'test_dataloader' not in cfg.data:\n        cfg.data['test_dataloader'] = ConfigDict()\n\n    # special process for train_dataloader\n    if 'samples_per_gpu' in cfg.data:\n\n        samples_per_gpu = cfg.data.pop('samples_per_gpu')\n        assert 'samples_per_gpu' not in \\\n               cfg.data.train_dataloader, ('`samples_per_gpu` are set '\n                                           'in `data` field and ` '\n                                           'data.train_dataloader` '\n                                           'at the same time. '\n                                           'Please only set it in '\n                                           '`data.train_dataloader`. ')\n        cfg.data.train_dataloader['samples_per_gpu'] = samples_per_gpu\n\n    if 'persistent_workers' in cfg.data:\n\n        persistent_workers = cfg.data.pop('persistent_workers')\n        assert 'persistent_workers' not in \\\n               cfg.data.train_dataloader, ('`persistent_workers` are set '\n                                           'in `data` field and ` '\n                                           'data.train_dataloader` '\n                                           'at the same time. '\n                                           'Please only set it in '\n                                           '`data.train_dataloader`. ')\n        cfg.data.train_dataloader['persistent_workers'] = persistent_workers\n\n    if 'workers_per_gpu' in cfg.data:\n\n        workers_per_gpu = cfg.data.pop('workers_per_gpu')\n        cfg.data.train_dataloader['workers_per_gpu'] = workers_per_gpu\n        cfg.data.val_dataloader['workers_per_gpu'] = workers_per_gpu\n        cfg.data.test_dataloader['workers_per_gpu'] = workers_per_gpu\n\n    # special process for val_dataloader\n    if 'samples_per_gpu' in cfg.data.val:\n        # keep default value of `sample_per_gpu` is 1\n        assert 'samples_per_gpu' not in \\\n               cfg.data.val_dataloader, ('`samples_per_gpu` are set '\n                                         'in `data.val` field and ` '\n                                         'data.val_dataloader` at '\n                                         'the same time. '\n                                         'Please only set it in '\n                                         '`data.val_dataloader`. ')\n        cfg.data.val_dataloader['samples_per_gpu'] = \\\n            cfg.data.val.pop('samples_per_gpu')\n    # special process for val_dataloader\n\n    # in case the test dataset is concatenated\n    if isinstance(cfg.data.test, dict):\n        if 'samples_per_gpu' in cfg.data.test:\n            assert 'samples_per_gpu' not in \\\n                   cfg.data.test_dataloader, ('`samples_per_gpu` are set '\n                                              'in `data.test` field and ` '\n                                              'data.test_dataloader` '\n                                              'at the same time. '\n                                              'Please only set it in '\n                                              '`data.test_dataloader`. ')\n\n            cfg.data.test_dataloader['samples_per_gpu'] = \\\n                cfg.data.test.pop('samples_per_gpu')\n\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            if 'samples_per_gpu' in ds_cfg:\n                assert 'samples_per_gpu' not in \\\n                       cfg.data.test_dataloader, ('`samples_per_gpu` are set '\n                                                  'in `data.test` field and ` '\n                                                  'data.test_dataloader` at'\n                                                  ' the same time. '\n                                                  'Please only set it in '\n                                                  '`data.test_dataloader`. ')\n        samples_per_gpu = max(\n            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])\n        cfg.data.test_dataloader['samples_per_gpu'] = samples_per_gpu\n\n    return cfg\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/contextmanagers.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport asyncio\nimport contextlib\nimport logging\nimport os\nimport time\nfrom typing import List\n\nimport torch\n\nlogger = logging.getLogger(__name__)\n\nDEBUG_COMPLETED_TIME = bool(os.environ.get('DEBUG_COMPLETED_TIME', False))\n\n\n@contextlib.asynccontextmanager\nasync def completed(trace_name='',\n                    name='',\n                    sleep_interval=0.05,\n                    streams: List[torch.cuda.Stream] = None):\n    \"\"\"Async context manager that waits for work to complete on given CUDA\n    streams.\"\"\"\n    if not torch.cuda.is_available():\n        yield\n        return\n\n    stream_before_context_switch = torch.cuda.current_stream()\n    if not streams:\n        streams = [stream_before_context_switch]\n    else:\n        streams = [s if s else stream_before_context_switch for s in streams]\n\n    end_events = [\n        torch.cuda.Event(enable_timing=DEBUG_COMPLETED_TIME) for _ in streams\n    ]\n\n    if DEBUG_COMPLETED_TIME:\n        start = torch.cuda.Event(enable_timing=True)\n        stream_before_context_switch.record_event(start)\n\n        cpu_start = time.monotonic()\n    logger.debug('%s %s starting, streams: %s', trace_name, name, streams)\n    grad_enabled_before = torch.is_grad_enabled()\n    try:\n        yield\n    finally:\n        current_stream = torch.cuda.current_stream()\n        assert current_stream == stream_before_context_switch\n\n        if DEBUG_COMPLETED_TIME:\n            cpu_end = time.monotonic()\n        for i, stream in enumerate(streams):\n            event = end_events[i]\n            stream.record_event(event)\n\n        grad_enabled_after = torch.is_grad_enabled()\n\n        # observed change of torch.is_grad_enabled() during concurrent run of\n        # async_test_bboxes code\n        assert (grad_enabled_before == grad_enabled_after\n                ), 'Unexpected is_grad_enabled() value change'\n\n        are_done = [e.query() for e in end_events]\n        logger.debug('%s %s completed: %s streams: %s', trace_name, name,\n                     are_done, streams)\n        with torch.cuda.stream(stream_before_context_switch):\n            while not all(are_done):\n                await asyncio.sleep(sleep_interval)\n                are_done = [e.query() for e in end_events]\n                logger.debug(\n                    '%s %s completed: %s streams: %s',\n                    trace_name,\n                    name,\n                    are_done,\n                    streams,\n                )\n\n        current_stream = torch.cuda.current_stream()\n        assert current_stream == stream_before_context_switch\n\n        if DEBUG_COMPLETED_TIME:\n            cpu_time = (cpu_end - cpu_start) * 1000\n            stream_times_ms = ''\n            for i, stream in enumerate(streams):\n                elapsed_time = start.elapsed_time(end_events[i])\n                stream_times_ms += f' {stream} {elapsed_time:.2f} ms'\n            logger.info('%s %s %.2f ms %s', trace_name, name, cpu_time,\n                        stream_times_ms)\n\n\n@contextlib.asynccontextmanager\nasync def concurrent(streamqueue: asyncio.Queue,\n                     trace_name='concurrent',\n                     name='stream'):\n    \"\"\"Run code concurrently in different streams.\n\n    :param streamqueue: asyncio.Queue instance.\n\n    Queue tasks define the pool of streams used for concurrent execution.\n    \"\"\"\n    if not torch.cuda.is_available():\n        yield\n        return\n\n    initial_stream = torch.cuda.current_stream()\n\n    with torch.cuda.stream(initial_stream):\n        stream = await streamqueue.get()\n        assert isinstance(stream, torch.cuda.Stream)\n\n        try:\n            with torch.cuda.stream(stream):\n                logger.debug('%s %s is starting, stream: %s', trace_name, name,\n                             stream)\n                yield\n                current = torch.cuda.current_stream()\n                assert current == stream\n                logger.debug('%s %s has finished, stream: %s', trace_name,\n                             name, stream)\n        finally:\n            streamqueue.task_done()\n            streamqueue.put_nowait(stream)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/logger.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport inspect\nimport logging\n\nfrom mmcv.utils import get_logger\n\n\ndef get_root_logger(log_file=None, log_level=logging.INFO):\n    \"\"\"Get root logger.\n\n    Args:\n        log_file (str, optional): File path of log. Defaults to None.\n        log_level (int, optional): The level of logger.\n            Defaults to logging.INFO.\n\n    Returns:\n        :obj:`logging.Logger`: The obtained logger\n    \"\"\"\n    logger = get_logger(name='mmdet', log_file=log_file, log_level=log_level)\n\n    return logger\n\n\ndef get_caller_name():\n    \"\"\"Get name of caller method.\"\"\"\n    # this_func_frame = inspect.stack()[0][0]  # i.e., get_caller_name\n    # callee_frame = inspect.stack()[1][0]  # e.g., log_img_scale\n    caller_frame = inspect.stack()[2][0]  # e.g., caller of log_img_scale\n    caller_method = caller_frame.f_code.co_name\n    try:\n        caller_class = caller_frame.f_locals['self'].__class__.__name__\n        return f'{caller_class}.{caller_method}'\n    except KeyError:  # caller is a function\n        return caller_method\n\n\ndef log_img_scale(img_scale, shape_order='hw', skip_square=False):\n    \"\"\"Log image size.\n\n    Args:\n        img_scale (tuple): Image size to be logged.\n        shape_order (str, optional): The order of image shape.\n            'hw' for (height, width) and 'wh' for (width, height).\n            Defaults to 'hw'.\n        skip_square (bool, optional): Whether to skip logging for square\n            img_scale. Defaults to False.\n\n    Returns:\n        bool: Whether to have done logging.\n    \"\"\"\n    if shape_order == 'hw':\n        height, width = img_scale\n    elif shape_order == 'wh':\n        width, height = img_scale\n    else:\n        raise ValueError(f'Invalid shape_order {shape_order}.')\n\n    if skip_square and (height == width):\n        return False\n\n    logger = get_root_logger()\n    caller = get_caller_name()\n    logger.info(f'image shape: height={height}, width={width} in {caller}')\n\n    return True\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/memory.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\nfrom collections import abc\nfrom contextlib import contextmanager\nfrom functools import wraps\n\nimport torch\n\nfrom mmdet.utils import get_root_logger\n\n\ndef cast_tensor_type(inputs, src_type=None, dst_type=None):\n    \"\"\"Recursively convert Tensor in inputs from ``src_type`` to ``dst_type``.\n\n    Args:\n        inputs: Inputs that to be casted.\n        src_type (torch.dtype | torch.device): Source type.\n        src_type (torch.dtype | torch.device): Destination type.\n\n    Returns:\n        The same type with inputs, but all contained Tensors have been cast.\n    \"\"\"\n    assert dst_type is not None\n    if isinstance(inputs, torch.Tensor):\n        if isinstance(dst_type, torch.device):\n            # convert Tensor to dst_device\n            if hasattr(inputs, 'to') and \\\n                    hasattr(inputs, 'device') and \\\n                    (inputs.device == src_type or src_type is None):\n                return inputs.to(dst_type)\n            else:\n                return inputs\n        else:\n            # convert Tensor to dst_dtype\n            if hasattr(inputs, 'to') and \\\n                    hasattr(inputs, 'dtype') and \\\n                    (inputs.dtype == src_type or src_type is None):\n                return inputs.to(dst_type)\n            else:\n                return inputs\n        # we need to ensure that the type of inputs to be casted are the same\n        # as the argument `src_type`.\n    elif isinstance(inputs, abc.Mapping):\n        return type(inputs)({\n            k: cast_tensor_type(v, src_type=src_type, dst_type=dst_type)\n            for k, v in inputs.items()\n        })\n    elif isinstance(inputs, abc.Iterable):\n        return type(inputs)(\n            cast_tensor_type(item, src_type=src_type, dst_type=dst_type)\n            for item in inputs)\n    # TODO: Currently not supported\n    # elif isinstance(inputs, InstanceData):\n    #     for key, value in inputs.items():\n    #         inputs[key] = cast_tensor_type(\n    #             value, src_type=src_type, dst_type=dst_type)\n    #     return inputs\n    else:\n        return inputs\n\n\n@contextmanager\ndef _ignore_torch_cuda_oom():\n    \"\"\"A context which ignores CUDA OOM exception from pytorch.\n\n    Code is modified from\n    <https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/memory.py>  # noqa: E501\n    \"\"\"\n    try:\n        yield\n    except RuntimeError as e:\n        # NOTE: the string may change?\n        if 'CUDA out of memory. ' in str(e):\n            pass\n        else:\n            raise\n\n\nclass AvoidOOM:\n    \"\"\"Try to convert inputs to FP16 and CPU if got a PyTorch's CUDA Out of\n    Memory error. It will do the following steps:\n\n        1. First retry after calling `torch.cuda.empty_cache()`.\n        2. If that still fails, it will then retry by converting inputs\n          to FP16.\n        3. If that still fails trying to convert inputs to CPUs.\n          In this case, it expects the function to dispatch to\n          CPU implementation.\n\n    Args:\n        to_cpu (bool): Whether to convert outputs to CPU if get an OOM\n            error. This will slow down the code significantly.\n            Defaults to True.\n        test (bool): Skip `_ignore_torch_cuda_oom` operate that can use\n            lightweight data in unit test, only used in\n            test unit. Defaults to False.\n\n    Examples:\n        >>> from mmdet.utils.memory import AvoidOOM\n        >>> AvoidCUDAOOM = AvoidOOM()\n        >>> output = AvoidOOM.retry_if_cuda_oom(\n        >>>     some_torch_function)(input1, input2)\n        >>> # To use as a decorator\n        >>> # from mmdet.utils import AvoidCUDAOOM\n        >>> @AvoidCUDAOOM.retry_if_cuda_oom\n        >>> def function(*args, **kwargs):\n        >>>     return None\n    ```\n\n    Note:\n        1. The output may be on CPU even if inputs are on GPU. Processing\n            on CPU will slow down the code significantly.\n        2. When converting inputs to CPU, it will only look at each argument\n            and check if it has `.device` and `.to` for conversion. Nested\n            structures of tensors are not supported.\n        3. Since the function might be called more than once, it has to be\n            stateless.\n    \"\"\"\n\n    def __init__(self, to_cpu=True, test=False):\n        self.to_cpu = to_cpu\n        self.test = test\n\n    def retry_if_cuda_oom(self, func):\n        \"\"\"Makes a function retry itself after encountering pytorch's CUDA OOM\n        error.\n\n        The implementation logic is referred to\n        https://github.com/facebookresearch/detectron2/blob/main/detectron2/utils/memory.py\n\n        Args:\n            func: a stateless callable that takes tensor-like objects\n                as arguments.\n        Returns:\n            func: a callable which retries `func` if OOM is encountered.\n        \"\"\"  # noqa: W605\n\n        @wraps(func)\n        def wrapped(*args, **kwargs):\n\n            # raw function\n            if not self.test:\n                with _ignore_torch_cuda_oom():\n                    return func(*args, **kwargs)\n\n                # Clear cache and retry\n                torch.cuda.empty_cache()\n                with _ignore_torch_cuda_oom():\n                    return func(*args, **kwargs)\n\n            # get the type and device of first tensor\n            dtype, device = None, None\n            values = args + tuple(kwargs.values())\n            for value in values:\n                if isinstance(value, torch.Tensor):\n                    dtype = value.dtype\n                    device = value.device\n                    break\n            if dtype is None or device is None:\n                raise ValueError('There is no tensor in the inputs, '\n                                 'cannot get dtype and device.')\n\n            # Convert to FP16\n            fp16_args = cast_tensor_type(args, dst_type=torch.half)\n            fp16_kwargs = cast_tensor_type(kwargs, dst_type=torch.half)\n            logger = get_root_logger()\n            logger.warning(f'Attempting to copy inputs of {str(func)} '\n                           'to FP16 due to CUDA OOM')\n\n            # get input tensor type, the output type will same as\n            # the first parameter type.\n            with _ignore_torch_cuda_oom():\n                output = func(*fp16_args, **fp16_kwargs)\n                output = cast_tensor_type(\n                    output, src_type=torch.half, dst_type=dtype)\n                if not self.test:\n                    return output\n            logger.warning('Using FP16 still meet CUDA OOM')\n\n            # Try on CPU. This will slow down the code significantly,\n            # therefore print a notice.\n            if self.to_cpu:\n                logger.warning(f'Attempting to copy inputs of {str(func)} '\n                               'to CPU due to CUDA OOM')\n                cpu_device = torch.empty(0).device\n                cpu_args = cast_tensor_type(args, dst_type=cpu_device)\n                cpu_kwargs = cast_tensor_type(kwargs, dst_type=cpu_device)\n\n                # convert outputs to GPU\n                with _ignore_torch_cuda_oom():\n                    logger.warning(f'Convert outputs to GPU (device={device})')\n                    output = func(*cpu_args, **cpu_kwargs)\n                    output = cast_tensor_type(\n                        output, src_type=cpu_device, dst_type=device)\n                    return output\n\n                warnings.warn('Cannot convert output to GPU due to CUDA OOM, '\n                              'the output is now on CPU, which might cause '\n                              'errors if the output need to interact with GPU '\n                              'data in subsequent operations')\n                logger.warning('Cannot convert output to GPU due to '\n                               'CUDA OOM, the output is on CPU now.')\n\n                return func(*cpu_args, **cpu_kwargs)\n            else:\n                # may still get CUDA OOM error\n                return func(*args, **kwargs)\n\n        return wrapped\n\n\n# To use AvoidOOM as a decorator\nAvoidCUDAOOM = AvoidOOM()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/misc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport glob\nimport os\nimport os.path as osp\nimport warnings\n\nimport mmcv\nimport torch\nfrom mmcv.utils import TORCH_VERSION, digit_version, print_log\n\n\ndef find_latest_checkpoint(path, suffix='pth'):\n    \"\"\"Find the latest checkpoint from the working directory.\n\n    Args:\n        path(str): The path to find checkpoints.\n        suffix(str): File extension.\n            Defaults to pth.\n\n    Returns:\n        latest_path(str | None): File path of the latest checkpoint.\n    References:\n        .. [1] https://github.com/microsoft/SoftTeacher\n                  /blob/main/ssod/utils/patch.py\n    \"\"\"\n    if not osp.exists(path):\n        warnings.warn('The path of checkpoints does not exist.')\n        return None\n    if osp.exists(osp.join(path, f'latest.{suffix}')):\n        return osp.join(path, f'latest.{suffix}')\n\n    checkpoints = glob.glob(osp.join(path, f'*.{suffix}'))\n    if len(checkpoints) == 0:\n        warnings.warn('There are no checkpoints in the path.')\n        return None\n    latest = -1\n    latest_path = None\n    for checkpoint in checkpoints:\n        count = int(osp.basename(checkpoint).split('_')[-1].split('.')[0])\n        if count > latest:\n            latest = count\n            latest_path = checkpoint\n    return latest_path\n\n\ndef update_data_root(cfg, logger=None):\n    \"\"\"Update data root according to env MMDET_DATASETS.\n\n    If set env MMDET_DATASETS, update cfg.data_root according to\n    MMDET_DATASETS. Otherwise, using cfg.data_root as default.\n\n    Args:\n        cfg (mmcv.Config): The model config need to modify\n        logger (logging.Logger | str | None): the way to print msg\n    \"\"\"\n    assert isinstance(cfg, mmcv.Config), \\\n        f'cfg got wrong type: {type(cfg)}, expected mmcv.Config'\n\n    if 'MMDET_DATASETS' in os.environ:\n        dst_root = os.environ['MMDET_DATASETS']\n        print_log(f'MMDET_DATASETS has been set to be {dst_root}.'\n                  f'Using {dst_root} as data root.')\n    else:\n        return\n\n    assert isinstance(cfg, mmcv.Config), \\\n        f'cfg got wrong type: {type(cfg)}, expected mmcv.Config'\n\n    def update(cfg, src_str, dst_str):\n        for k, v in cfg.items():\n            if isinstance(v, mmcv.ConfigDict):\n                update(cfg[k], src_str, dst_str)\n            if isinstance(v, str) and src_str in v:\n                cfg[k] = v.replace(src_str, dst_str)\n\n    update(cfg.data, cfg.data_root, dst_root)\n    cfg.data_root = dst_root\n\n\n_torch_version_div_indexing = (\n    'parrots' not in TORCH_VERSION\n    and digit_version(TORCH_VERSION) >= digit_version('1.8'))\n\n\ndef floordiv(dividend, divisor, rounding_mode='trunc'):\n    if _torch_version_div_indexing:\n        return torch.div(dividend, divisor, rounding_mode=rounding_mode)\n    else:\n        return dividend // divisor\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/profiling.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport contextlib\nimport sys\nimport time\n\nimport torch\n\nif sys.version_info >= (3, 7):\n\n    @contextlib.contextmanager\n    def profile_time(trace_name,\n                     name,\n                     enabled=True,\n                     stream=None,\n                     end_stream=None):\n        \"\"\"Print time spent by CPU and GPU.\n\n        Useful as a temporary context manager to find sweet spots of code\n        suitable for async implementation.\n        \"\"\"\n        if (not enabled) or not torch.cuda.is_available():\n            yield\n            return\n        stream = stream if stream else torch.cuda.current_stream()\n        end_stream = end_stream if end_stream else stream\n        start = torch.cuda.Event(enable_timing=True)\n        end = torch.cuda.Event(enable_timing=True)\n        stream.record_event(start)\n        try:\n            cpu_start = time.monotonic()\n            yield\n        finally:\n            cpu_end = time.monotonic()\n            end_stream.record_event(end)\n            end.synchronize()\n            cpu_time = (cpu_end - cpu_start) * 1000\n            gpu_time = start.elapsed_time(end)\n            msg = f'{trace_name} {name} cpu_time {cpu_time:.2f} ms '\n            msg += f'gpu_time {gpu_time:.2f} ms stream {stream}'\n            print(msg, end_stream)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/replace_cfg_vals.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport re\n\nfrom mmcv.utils import Config\n\n\ndef replace_cfg_vals(ori_cfg):\n    \"\"\"Replace the string \"${key}\" with the corresponding value.\n\n    Replace the \"${key}\" with the value of ori_cfg.key in the config. And\n    support replacing the chained ${key}. Such as, replace \"${key0.key1}\"\n    with the value of cfg.key0.key1. Code is modified from `vars.py\n    < https://github.com/microsoft/SoftTeacher/blob/main/ssod/utils/vars.py>`_  # noqa: E501\n\n    Args:\n        ori_cfg (mmcv.utils.config.Config):\n            The origin config with \"${key}\" generated from a file.\n\n    Returns:\n        updated_cfg [mmcv.utils.config.Config]:\n            The config with \"${key}\" replaced by the corresponding value.\n    \"\"\"\n\n    def get_value(cfg, key):\n        for k in key.split('.'):\n            cfg = cfg[k]\n        return cfg\n\n    def replace_value(cfg):\n        if isinstance(cfg, dict):\n            return {key: replace_value(value) for key, value in cfg.items()}\n        elif isinstance(cfg, list):\n            return [replace_value(item) for item in cfg]\n        elif isinstance(cfg, tuple):\n            return tuple([replace_value(item) for item in cfg])\n        elif isinstance(cfg, str):\n            # the format of string cfg may be:\n            # 1) \"${key}\", which will be replaced with cfg.key directly\n            # 2) \"xxx${key}xxx\" or \"xxx${key1}xxx${key2}xxx\",\n            # which will be replaced with the string of the cfg.key\n            keys = pattern_key.findall(cfg)\n            values = [get_value(ori_cfg, key[2:-1]) for key in keys]\n            if len(keys) == 1 and keys[0] == cfg:\n                # the format of string cfg is \"${key}\"\n                cfg = values[0]\n            else:\n                for key, value in zip(keys, values):\n                    # the format of string cfg is\n                    # \"xxx${key}xxx\" or \"xxx${key1}xxx${key2}xxx\"\n                    assert not isinstance(value, (dict, list, tuple)), \\\n                        f'for the format of string cfg is ' \\\n                        f\"'xxxxx${key}xxxxx' or 'xxx${key}xxx${key}xxx', \" \\\n                        f\"the type of the value of '${key}' \" \\\n                        f'can not be dict, list, or tuple' \\\n                        f'but you input {type(value)} in {cfg}'\n                    cfg = cfg.replace(key, str(value))\n            return cfg\n        else:\n            return cfg\n\n    # the pattern of string \"${key}\"\n    pattern_key = re.compile(r'\\$\\{[a-zA-Z\\d_.]*\\}')\n    # the type of ori_cfg._cfg_dict is mmcv.utils.config.ConfigDict\n    updated_cfg = Config(\n        replace_value(ori_cfg._cfg_dict), filename=ori_cfg.filename)\n    # replace the model with model_wrapper\n    if updated_cfg.get('model_wrapper', None) is not None:\n        updated_cfg.model = updated_cfg.model_wrapper\n        updated_cfg.pop('model_wrapper')\n    return updated_cfg\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/setup_env.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport platform\nimport warnings\n\nimport cv2\nimport torch.multiprocessing as mp\n\n\ndef setup_multi_processes(cfg):\n    \"\"\"Setup multi-processing environment variables.\"\"\"\n    # set multi-process start method as `fork` to speed up the training\n    if platform.system() != 'Windows':\n        mp_start_method = cfg.get('mp_start_method', 'fork')\n        current_method = mp.get_start_method(allow_none=True)\n        if current_method is not None and current_method != mp_start_method:\n            warnings.warn(\n                f'Multi-processing start method `{mp_start_method}` is '\n                f'different from the previous setting `{current_method}`.'\n                f'It will be force set to `{mp_start_method}`. You can change '\n                f'this behavior by changing `mp_start_method` in your config.')\n        mp.set_start_method(mp_start_method, force=True)\n\n    # disable opencv multithreading to avoid system being overloaded\n    opencv_num_threads = cfg.get('opencv_num_threads', 0)\n    cv2.setNumThreads(opencv_num_threads)\n\n    # setup OMP threads\n    # This code is referred from https://github.com/pytorch/pytorch/blob/master/torch/distributed/run.py  # noqa\n    workers_per_gpu = cfg.data.get('workers_per_gpu', 1)\n    if 'train_dataloader' in cfg.data:\n        workers_per_gpu = \\\n            max(cfg.data.train_dataloader.get('workers_per_gpu', 1),\n                workers_per_gpu)\n\n    if 'OMP_NUM_THREADS' not in os.environ and workers_per_gpu > 1:\n        omp_num_threads = 1\n        warnings.warn(\n            f'Setting OMP_NUM_THREADS environment variable for each process '\n            f'to be {omp_num_threads} in default, to avoid your system being '\n            f'overloaded, please further tune the variable for optimal '\n            f'performance in your application as needed.')\n        os.environ['OMP_NUM_THREADS'] = str(omp_num_threads)\n\n    # setup MKL threads\n    if 'MKL_NUM_THREADS' not in os.environ and workers_per_gpu > 1:\n        mkl_num_threads = 1\n        warnings.warn(\n            f'Setting MKL_NUM_THREADS environment variable for each process '\n            f'to be {mkl_num_threads} in default, to avoid your system being '\n            f'overloaded, please further tune the variable for optimal '\n            f'performance in your application as needed.')\n        os.environ['MKL_NUM_THREADS'] = str(mkl_num_threads)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/split_batch.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\n\ndef split_batch(img, img_metas, kwargs):\n    \"\"\"Split data_batch by tags.\n\n    Code is modified from\n    <https://github.com/microsoft/SoftTeacher/blob/main/ssod/utils/structure_utils.py> # noqa: E501\n\n    Args:\n        img (Tensor): of shape (N, C, H, W) encoding input images.\n            Typically these should be mean centered and std scaled.\n        img_metas (list[dict]): List of image info dict where each dict\n            has: 'img_shape', 'scale_factor', 'flip', and may also contain\n            'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.\n            For details on the values of these keys, see\n            :class:`mmdet.datasets.pipelines.Collect`.\n        kwargs (dict): Specific to concrete implementation.\n\n    Returns:\n        data_groups (dict): a dict that data_batch splited by tags,\n            such as 'sup', 'unsup_teacher', and 'unsup_student'.\n    \"\"\"\n\n    # only stack img in the batch\n    def fuse_list(obj_list, obj):\n        return torch.stack(obj_list) if isinstance(obj,\n                                                   torch.Tensor) else obj_list\n\n    # select data with tag from data_batch\n    def select_group(data_batch, current_tag):\n        group_flag = [tag == current_tag for tag in data_batch['tag']]\n        return {\n            k: fuse_list([vv for vv, gf in zip(v, group_flag) if gf], v)\n            for k, v in data_batch.items()\n        }\n\n    kwargs.update({'img': img, 'img_metas': img_metas})\n    kwargs.update({'tag': [meta['tag'] for meta in img_metas]})\n    tags = list(set(kwargs['tag']))\n    data_groups = {tag: select_group(kwargs, tag) for tag in tags}\n    for tag, group in data_groups.items():\n        group.pop('tag')\n    return data_groups\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/util_distribution.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\n\ndp_factory = {'cuda': MMDataParallel, 'cpu': MMDataParallel}\n\nddp_factory = {'cuda': MMDistributedDataParallel}\n\n\ndef build_dp(model, device='cuda', dim=0, *args, **kwargs):\n    \"\"\"build DataParallel module by device type.\n\n    if device is cuda, return a MMDataParallel model; if device is mlu,\n    return a MLUDataParallel model.\n\n    Args:\n        model (:class:`nn.Module`): model to be parallelized.\n        device (str): device type, cuda, cpu or mlu. Defaults to cuda.\n        dim (int): Dimension used to scatter the data. Defaults to 0.\n\n    Returns:\n        nn.Module: the model to be parallelized.\n    \"\"\"\n    if device == 'npu':\n        from mmcv.device.npu import NPUDataParallel\n        dp_factory['npu'] = NPUDataParallel\n        torch.npu.set_device(kwargs['device_ids'][0])\n        torch.npu.set_compile_mode(jit_compile=False)\n        model = model.npu()\n    elif device == 'cuda':\n        model = model.cuda(kwargs['device_ids'][0])\n    elif device == 'mlu':\n        from mmcv.device.mlu import MLUDataParallel\n        dp_factory['mlu'] = MLUDataParallel\n        model = model.mlu()\n\n    return dp_factory[device](model, dim=dim, *args, **kwargs)\n\n\ndef build_ddp(model, device='cuda', *args, **kwargs):\n    \"\"\"Build DistributedDataParallel module by device type.\n\n    If device is cuda, return a MMDistributedDataParallel model;\n    if device is mlu, return a MLUDistributedDataParallel model.\n\n    Args:\n        model (:class:`nn.Module`): module to be parallelized.\n        device (str): device type, mlu or cuda.\n\n    Returns:\n        :class:`nn.Module`: the module to be parallelized\n\n    References:\n        .. [1] https://pytorch.org/docs/stable/generated/torch.nn.parallel.\n                     DistributedDataParallel.html\n    \"\"\"\n    assert device in ['cuda', 'mlu',\n                      'npu'], 'Only available for cuda or mlu or npu devices.'\n    if device == 'npu':\n        from mmcv.device.npu import NPUDistributedDataParallel\n        torch.npu.set_compile_mode(jit_compile=False)\n        ddp_factory['npu'] = NPUDistributedDataParallel\n        model = model.npu()\n    elif device == 'cuda':\n        model = model.cuda()\n    elif device == 'mlu':\n        from mmcv.device.mlu import MLUDistributedDataParallel\n        ddp_factory['mlu'] = MLUDistributedDataParallel\n        model = model.mlu()\n\n    return ddp_factory[device](model, *args, **kwargs)\n\n\ndef is_npu_available():\n    \"\"\"Returns a bool indicating if NPU is currently available.\"\"\"\n    return hasattr(torch, 'npu') and torch.npu.is_available()\n\n\ndef is_mlu_available():\n    \"\"\"Returns a bool indicating if MLU is currently available.\"\"\"\n    return hasattr(torch, 'is_mlu_available') and torch.is_mlu_available()\n\n\ndef get_device():\n    \"\"\"Returns an available device, cpu, cuda or mlu.\"\"\"\n    is_device_available = {\n        'npu': is_npu_available(),\n        'cuda': torch.cuda.is_available(),\n        'mlu': is_mlu_available()\n    }\n    device_list = [k for k, v in is_device_available.items() if v]\n    return device_list[0] if len(device_list) >= 1 else 'cpu'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/util_mixins.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"This module defines the :class:`NiceRepr` mixin class, which defines a\n``__repr__`` and ``__str__`` method that only depend on a custom ``__nice__``\nmethod, which you must define. This means you only have to overload one\nfunction instead of two.  Furthermore, if the object defines a ``__len__``\nmethod, then the ``__nice__`` method defaults to something sensible, otherwise\nit is treated as abstract and raises ``NotImplementedError``.\n\nTo use simply have your object inherit from :class:`NiceRepr`\n(multi-inheritance should be ok).\n\nThis code was copied from the ubelt library: https://github.com/Erotemic/ubelt\n\nExample:\n    >>> # Objects that define __nice__ have a default __str__ and __repr__\n    >>> class Student(NiceRepr):\n    ...    def __init__(self, name):\n    ...        self.name = name\n    ...    def __nice__(self):\n    ...        return self.name\n    >>> s1 = Student('Alice')\n    >>> s2 = Student('Bob')\n    >>> print(f's1 = {s1}')\n    >>> print(f's2 = {s2}')\n    s1 = <Student(Alice)>\n    s2 = <Student(Bob)>\n\nExample:\n    >>> # Objects that define __len__ have a default __nice__\n    >>> class Group(NiceRepr):\n    ...    def __init__(self, data):\n    ...        self.data = data\n    ...    def __len__(self):\n    ...        return len(self.data)\n    >>> g = Group([1, 2, 3])\n    >>> print(f'g = {g}')\n    g = <Group(3)>\n\"\"\"\nimport warnings\n\n\nclass NiceRepr:\n    \"\"\"Inherit from this class and define ``__nice__`` to \"nicely\" print your\n    objects.\n\n    Defines ``__str__`` and ``__repr__`` in terms of ``__nice__`` function\n    Classes that inherit from :class:`NiceRepr` should redefine ``__nice__``.\n    If the inheriting class has a ``__len__``, method then the default\n    ``__nice__`` method will return its length.\n\n    Example:\n        >>> class Foo(NiceRepr):\n        ...    def __nice__(self):\n        ...        return 'info'\n        >>> foo = Foo()\n        >>> assert str(foo) == '<Foo(info)>'\n        >>> assert repr(foo).startswith('<Foo(info) at ')\n\n    Example:\n        >>> class Bar(NiceRepr):\n        ...    pass\n        >>> bar = Bar()\n        >>> import pytest\n        >>> with pytest.warns(None) as record:\n        >>>     assert 'object at' in str(bar)\n        >>>     assert 'object at' in repr(bar)\n\n    Example:\n        >>> class Baz(NiceRepr):\n        ...    def __len__(self):\n        ...        return 5\n        >>> baz = Baz()\n        >>> assert str(baz) == '<Baz(5)>'\n    \"\"\"\n\n    def __nice__(self):\n        \"\"\"str: a \"nice\" summary string describing this module\"\"\"\n        if hasattr(self, '__len__'):\n            # It is a common pattern for objects to use __len__ in __nice__\n            # As a convenience we define a default __nice__ for these objects\n            return str(len(self))\n        else:\n            # In all other cases force the subclass to overload __nice__\n            raise NotImplementedError(\n                f'Define the __nice__ method for {self.__class__!r}')\n\n    def __repr__(self):\n        \"\"\"str: the string of the module\"\"\"\n        try:\n            nice = self.__nice__()\n            classname = self.__class__.__name__\n            return f'<{classname}({nice}) at {hex(id(self))}>'\n        except NotImplementedError as ex:\n            warnings.warn(str(ex), category=RuntimeWarning)\n            return object.__repr__(self)\n\n    def __str__(self):\n        \"\"\"str: the string of the module\"\"\"\n        try:\n            classname = self.__class__.__name__\n            nice = self.__nice__()\n            return f'<{classname}({nice})>'\n        except NotImplementedError as ex:\n            warnings.warn(str(ex), category=RuntimeWarning)\n            return object.__repr__(self)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/utils/util_random.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"Helpers for random number generators.\"\"\"\nimport numpy as np\n\n\ndef ensure_rng(rng=None):\n    \"\"\"Coerces input into a random number generator.\n\n    If the input is None, then a global random state is returned.\n\n    If the input is a numeric value, then that is used as a seed to construct a\n    random state. Otherwise the input is returned as-is.\n\n    Adapted from [1]_.\n\n    Args:\n        rng (int | numpy.random.RandomState | None):\n            if None, then defaults to the global rng. Otherwise this can be an\n            integer or a RandomState class\n    Returns:\n        (numpy.random.RandomState) : rng -\n            a numpy random number generator\n\n    References:\n        .. [1] https://gitlab.kitware.com/computer-vision/kwarray/blob/master/kwarray/util_random.py#L270  # noqa: E501\n    \"\"\"\n\n    if rng is None:\n        rng = np.random.mtrand._rand\n    elif isinstance(rng, int):\n        rng = np.random.RandomState(rng)\n    else:\n        rng = rng\n    return rng\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/mmdet/version.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\n__version__ = '2.26.0'\nshort_version = __version__\n\n\ndef parse_version_info(version_str):\n    version_info = []\n    for x in version_str.split('.'):\n        if x.isdigit():\n            version_info.append(int(x))\n        elif x.find('rc') != -1:\n            patch_version = x.split('rc')\n            version_info.append(int(patch_version[0]))\n            version_info.append(f'rc{patch_version[1]}')\n    return tuple(version_info)\n\n\nversion_info = parse_version_info(__version__)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/model-index.yml",
    "content": "Import:\n  - configs/atss/metafile.yml\n  - configs/autoassign/metafile.yml\n  - configs/carafe/metafile.yml\n  - configs/cascade_rcnn/metafile.yml\n  - configs/cascade_rpn/metafile.yml\n  - configs/centernet/metafile.yml\n  - configs/centripetalnet/metafile.yml\n  - configs/cornernet/metafile.yml\n  - configs/convnext/metafile.yml\n  - configs/dcn/metafile.yml\n  - configs/dcnv2/metafile.yml\n  - configs/deformable_detr/metafile.yml\n  - configs/detectors/metafile.yml\n  - configs/detr/metafile.yml\n  - configs/double_heads/metafile.yml\n  - configs/dyhead/metafile.yml\n  - configs/dynamic_rcnn/metafile.yml\n  - configs/efficientnet/metafile.yml\n  - configs/empirical_attention/metafile.yml\n  - configs/faster_rcnn/metafile.yml\n  - configs/fcos/metafile.yml\n  - configs/foveabox/metafile.yml\n  - configs/fpg/metafile.yml\n  - configs/free_anchor/metafile.yml\n  - configs/fsaf/metafile.yml\n  - configs/gcnet/metafile.yml\n  - configs/gfl/metafile.yml\n  - configs/ghm/metafile.yml\n  - configs/gn/metafile.yml\n  - configs/gn+ws/metafile.yml\n  - configs/grid_rcnn/metafile.yml\n  - configs/groie/metafile.yml\n  - configs/guided_anchoring/metafile.yml\n  - configs/hrnet/metafile.yml\n  - configs/htc/metafile.yml\n  - configs/instaboost/metafile.yml\n  - configs/lad/metafile.yml\n  - configs/ld/metafile.yml\n  - configs/libra_rcnn/metafile.yml\n  - configs/mask_rcnn/metafile.yml\n  - configs/ms_rcnn/metafile.yml\n  - configs/nas_fcos/metafile.yml\n  - configs/nas_fpn/metafile.yml\n  - configs/openimages/metafile.yml\n  - configs/paa/metafile.yml\n  - configs/pafpn/metafile.yml\n  - configs/panoptic_fpn/metafile.yml\n  - configs/pvt/metafile.yml\n  - configs/pisa/metafile.yml\n  - configs/point_rend/metafile.yml\n  - configs/queryinst/metafile.yml\n  - configs/regnet/metafile.yml\n  - configs/reppoints/metafile.yml\n  - configs/res2net/metafile.yml\n  - configs/resnest/metafile.yml\n  - configs/retinanet/metafile.yml\n  - configs/sabl/metafile.yml\n  - configs/scnet/metafile.yml\n  - configs/scratch/metafile.yml\n  - configs/seesaw_loss/metafile.yml\n  - configs/sparse_rcnn/metafile.yml\n  - configs/solo/metafile.yml\n  - configs/ssd/metafile.yml\n  - configs/swin/metafile.yml\n  - configs/tridentnet/metafile.yml\n  - configs/tood/metafile.yml\n  - configs/vfnet/metafile.yml\n  - configs/yolact/metafile.yml\n  - configs/yolo/metafile.yml\n  - configs/yolof/metafile.yml\n  - configs/yolox/metafile.yml\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/pytest.ini",
    "content": "[pytest]\naddopts = --xdoctest --xdoctest-style=auto\nnorecursedirs = .git ignore build __pycache__ data docker docs .eggs\n\nfilterwarnings= default\n                ignore:.*No cfgstr given in Cacher constructor or call.*:Warning\n                ignore:.*Define the __nice__ method for.*:Warning\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/albu.txt",
    "content": "albumentations>=0.3.2 --no-binary qudida,albumentations\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/build.txt",
    "content": "# These must be installed before building mmdetection\ncython\nnumpy\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/docs.txt",
    "content": "docutils==0.16.0\nmarkdown>=3.4.0\nmyst-parser\n-e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme\nsphinx==5.3.0\nsphinx-copybutton\nsphinx_markdown_tables>=0.0.17\nsphinx_rtd_theme\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/mminstall.txt",
    "content": "mmcv-full>=1.3.17\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/optional.txt",
    "content": "cityscapesscripts\nimagecorruptions\nsklearn\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/readthedocs.txt",
    "content": "mmcv\ntorch\ntorchvision\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/runtime.txt",
    "content": "matplotlib\nnumpy\npycocotools\nscipy\nsix\nterminaltables\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements/tests.txt",
    "content": "asynctest\ncodecov\nflake8\ninterrogate\nisort==4.3.21\n# Note: used for kwarray.group_items, this may be ported to mmcv in the future.\nkwarray\n-e git+https://github.com/open-mmlab/mmtracking#egg=mmtrack\nonnx==1.7.0\nonnxruntime>=1.8.0\nprotobuf<=3.20.1\npytest\nubelt\nxdoctest>=0.10.0\nyapf\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/requirements.txt",
    "content": "-r requirements/build.txt\n-r requirements/optional.txt\n-r requirements/runtime.txt\n-r requirements/tests.txt\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/setup.cfg",
    "content": "[isort]\nline_length = 79\nmulti_line_output = 0\nextra_standard_library = setuptools\nknown_first_party = mmdet\nknown_third_party = PIL,asynctest,cityscapesscripts,cv2,gather_models,matplotlib,mmcv,numpy,onnx,onnxruntime,pycocotools,pytest,pytorch_sphinx_theme,requests,scipy,seaborn,six,terminaltables,torch,ts,yaml\nno_lines_before = STDLIB,LOCALFOLDER\ndefault_section = THIRDPARTY\n\n[yapf]\nBASED_ON_STYLE = pep8\nBLANK_LINE_BEFORE_NESTED_CLASS_OR_DEF = true\nSPLIT_BEFORE_EXPRESSION_AFTER_OPENING_PAREN = true\n\n# ignore-words-list needs to be lowercase format. For example, if we want to\n# ignore word \"BA\", then we need to append \"ba\" to ignore-words-list rather\n# than \"BA\"\n[codespell]\nskip = *.ipynb\nquiet-level = 3\nignore-words-list = patten,nd,ty,mot,hist,formating,winn,gool,datas,wan,confids,TOOD,tood,ba,warmup,nam\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/setup.py",
    "content": "#!/usr/bin/env python\n# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport os.path as osp\nimport platform\nimport shutil\nimport sys\nimport warnings\nfrom setuptools import find_packages, setup\n\nimport torch\nfrom torch.utils.cpp_extension import (BuildExtension, CppExtension,\n                                       CUDAExtension)\n\n\ndef readme():\n    with open('README.md', encoding='utf-8') as f:\n        content = f.read()\n    return content\n\n\nversion_file = 'mmdet/version.py'\n\n\ndef get_version():\n    with open(version_file, 'r') as f:\n        exec(compile(f.read(), version_file, 'exec'))\n    return locals()['__version__']\n\n\ndef make_cuda_ext(name, module, sources, sources_cuda=[]):\n\n    define_macros = []\n    extra_compile_args = {'cxx': []}\n\n    if torch.cuda.is_available() or os.getenv('FORCE_CUDA', '0') == '1':\n        define_macros += [('WITH_CUDA', None)]\n        extension = CUDAExtension\n        extra_compile_args['nvcc'] = [\n            '-D__CUDA_NO_HALF_OPERATORS__',\n            '-D__CUDA_NO_HALF_CONVERSIONS__',\n            '-D__CUDA_NO_HALF2_OPERATORS__',\n        ]\n        sources += sources_cuda\n    else:\n        print(f'Compiling {name} without CUDA')\n        extension = CppExtension\n\n    return extension(\n        name=f'{module}.{name}',\n        sources=[os.path.join(*module.split('.'), p) for p in sources],\n        define_macros=define_macros,\n        extra_compile_args=extra_compile_args)\n\n\ndef parse_requirements(fname='requirements.txt', with_version=True):\n    \"\"\"Parse the package dependencies listed in a requirements file but strips\n    specific versioning information.\n\n    Args:\n        fname (str): path to requirements file\n        with_version (bool, default=False): if True include version specs\n\n    Returns:\n        List[str]: list of requirements items\n\n    CommandLine:\n        python -c \"import setup; print(setup.parse_requirements())\"\n    \"\"\"\n    import re\n    import sys\n    from os.path import exists\n    require_fpath = fname\n\n    def parse_line(line):\n        \"\"\"Parse information from a line in a requirements text file.\"\"\"\n        if line.startswith('-r '):\n            # Allow specifying requirements in other files\n            target = line.split(' ')[1]\n            for info in parse_require_file(target):\n                yield info\n        else:\n            info = {'line': line}\n            if line.startswith('-e '):\n                info['package'] = line.split('#egg=')[1]\n            elif '@git+' in line:\n                info['package'] = line\n            else:\n                # Remove versioning from the package\n                pat = '(' + '|'.join(['>=', '==', '>']) + ')'\n                parts = re.split(pat, line, maxsplit=1)\n                parts = [p.strip() for p in parts]\n\n                info['package'] = parts[0]\n                if len(parts) > 1:\n                    op, rest = parts[1:]\n                    if ';' in rest:\n                        # Handle platform specific dependencies\n                        # http://setuptools.readthedocs.io/en/latest/setuptools.html#declaring-platform-specific-dependencies\n                        version, platform_deps = map(str.strip,\n                                                     rest.split(';'))\n                        info['platform_deps'] = platform_deps\n                    else:\n                        version = rest  # NOQA\n                    info['version'] = (op, version)\n            yield info\n\n    def parse_require_file(fpath):\n        with open(fpath, 'r') as f:\n            for line in f.readlines():\n                line = line.strip()\n                if line and not line.startswith('#'):\n                    for info in parse_line(line):\n                        yield info\n\n    def gen_packages_items():\n        if exists(require_fpath):\n            for info in parse_require_file(require_fpath):\n                parts = [info['package']]\n                if with_version and 'version' in info:\n                    parts.extend(info['version'])\n                if not sys.version.startswith('3.4'):\n                    # apparently package_deps are broken in 3.4\n                    platform_deps = info.get('platform_deps')\n                    if platform_deps is not None:\n                        parts.append(';' + platform_deps)\n                item = ''.join(parts)\n                yield item\n\n    packages = list(gen_packages_items())\n    return packages\n\n\ndef add_mim_extension():\n    \"\"\"Add extra files that are required to support MIM into the package.\n\n    These files will be added by creating a symlink to the originals if the\n    package is installed in `editable` mode (e.g. pip install -e .), or by\n    copying from the originals otherwise.\n    \"\"\"\n\n    # parse installment mode\n    if 'develop' in sys.argv:\n        # installed by `pip install -e .`\n        if platform.system() == 'Windows':\n            # set `copy` mode here since symlink fails on Windows.\n            mode = 'copy'\n        else:\n            mode = 'symlink'\n    elif 'sdist' in sys.argv or 'bdist_wheel' in sys.argv:\n        # installed by `pip install .`\n        # or create source distribution by `python setup.py sdist`\n        mode = 'copy'\n    else:\n        return\n\n    filenames = ['tools', 'configs', 'demo', 'model-index.yml']\n    repo_path = osp.dirname(__file__)\n    mim_path = osp.join(repo_path, 'mmdet', '.mim')\n    os.makedirs(mim_path, exist_ok=True)\n\n    for filename in filenames:\n        if osp.exists(filename):\n            src_path = osp.join(repo_path, filename)\n            tar_path = osp.join(mim_path, filename)\n\n            if osp.isfile(tar_path) or osp.islink(tar_path):\n                os.remove(tar_path)\n            elif osp.isdir(tar_path):\n                shutil.rmtree(tar_path)\n\n            if mode == 'symlink':\n                src_relpath = osp.relpath(src_path, osp.dirname(tar_path))\n                os.symlink(src_relpath, tar_path)\n            elif mode == 'copy':\n                if osp.isfile(src_path):\n                    shutil.copyfile(src_path, tar_path)\n                elif osp.isdir(src_path):\n                    shutil.copytree(src_path, tar_path)\n                else:\n                    warnings.warn(f'Cannot copy file {src_path}.')\n            else:\n                raise ValueError(f'Invalid mode {mode}')\n\n\nif __name__ == '__main__':\n    add_mim_extension()\n    setup(\n        name='mmdet',\n        version=get_version(),\n        description='OpenMMLab Detection Toolbox and Benchmark',\n        long_description=readme(),\n        long_description_content_type='text/markdown',\n        author='MMDetection Contributors',\n        author_email='openmmlab@gmail.com',\n        keywords='computer vision, object detection',\n        url='https://github.com/open-mmlab/mmdetection',\n        packages=find_packages(exclude=('configs', 'tools', 'demo')),\n        include_package_data=True,\n        classifiers=[\n            'Development Status :: 5 - Production/Stable',\n            'License :: OSI Approved :: Apache Software License',\n            'Operating System :: OS Independent',\n            'Programming Language :: Python :: 3',\n            'Programming Language :: Python :: 3.6',\n            'Programming Language :: Python :: 3.7',\n            'Programming Language :: Python :: 3.8',\n            'Programming Language :: Python :: 3.9',\n        ],\n        license='Apache License 2.0',\n        install_requires=parse_requirements('requirements/runtime.txt'),\n        extras_require={\n            'all': parse_requirements('requirements.txt'),\n            'tests': parse_requirements('requirements/tests.txt'),\n            'build': parse_requirements('requirements/build.txt'),\n            'optional': parse_requirements('requirements/optional.txt'),\n            'mim': parse_requirements('requirements/mminstall.txt'),\n        },\n        ext_modules=[],\n        cmdclass={'build_ext': BuildExtension},\n        zip_safe=False)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_coco_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport tempfile\n\nimport mmcv\nimport pytest\n\nfrom mmdet.datasets import CocoDataset\n\n\ndef _create_ids_error_coco_json(json_name):\n    image = {\n        'id': 0,\n        'width': 640,\n        'height': 640,\n        'file_name': 'fake_name.jpg',\n    }\n\n    annotation_1 = {\n        'id': 1,\n        'image_id': 0,\n        'category_id': 0,\n        'area': 400,\n        'bbox': [50, 60, 20, 20],\n        'iscrowd': 0,\n    }\n\n    annotation_2 = {\n        'id': 1,\n        'image_id': 0,\n        'category_id': 0,\n        'area': 900,\n        'bbox': [100, 120, 30, 30],\n        'iscrowd': 0,\n    }\n\n    categories = [{\n        'id': 0,\n        'name': 'car',\n        'supercategory': 'car',\n    }]\n\n    fake_json = {\n        'images': [image],\n        'annotations': [annotation_1, annotation_2],\n        'categories': categories\n    }\n    mmcv.dump(fake_json, json_name)\n\n\ndef test_coco_annotation_ids_unique():\n    tmp_dir = tempfile.TemporaryDirectory()\n    fake_json_file = osp.join(tmp_dir.name, 'fake_data.json')\n    _create_ids_error_coco_json(fake_json_file)\n\n    # test annotation ids not unique error\n    with pytest.raises(AssertionError):\n        CocoDataset(ann_file=fake_json_file, classes=('car', ), pipeline=[])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_common.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport logging\nimport os.path as osp\nimport tempfile\nfrom unittest.mock import MagicMock, patch\n\nimport mmcv\nimport numpy as np\nimport pytest\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import EpochBasedRunner\nfrom torch.utils.data import DataLoader\n\nfrom mmdet.core.evaluation import DistEvalHook, EvalHook\nfrom mmdet.datasets import DATASETS, CocoDataset, CustomDataset, build_dataset\n\n\ndef _create_dummy_coco_json(json_name):\n    image = {\n        'id': 0,\n        'width': 640,\n        'height': 640,\n        'file_name': 'fake_name.jpg',\n    }\n\n    annotation_1 = {\n        'id': 1,\n        'image_id': 0,\n        'category_id': 0,\n        'area': 400,\n        'bbox': [50, 60, 20, 20],\n        'iscrowd': 0,\n    }\n\n    annotation_2 = {\n        'id': 2,\n        'image_id': 0,\n        'category_id': 0,\n        'area': 900,\n        'bbox': [100, 120, 30, 30],\n        'iscrowd': 0,\n    }\n\n    annotation_3 = {\n        'id': 3,\n        'image_id': 0,\n        'category_id': 0,\n        'area': 1600,\n        'bbox': [150, 160, 40, 40],\n        'iscrowd': 0,\n    }\n\n    annotation_4 = {\n        'id': 4,\n        'image_id': 0,\n        'category_id': 0,\n        'area': 10000,\n        'bbox': [250, 260, 100, 100],\n        'iscrowd': 0,\n    }\n\n    categories = [{\n        'id': 0,\n        'name': 'car',\n        'supercategory': 'car',\n    }]\n\n    fake_json = {\n        'images': [image],\n        'annotations':\n        [annotation_1, annotation_2, annotation_3, annotation_4],\n        'categories': categories\n    }\n\n    mmcv.dump(fake_json, json_name)\n\n\ndef _create_dummy_custom_pkl(pkl_name):\n    fake_pkl = [{\n        'filename': 'fake_name.jpg',\n        'width': 640,\n        'height': 640,\n        'ann': {\n            'bboxes':\n            np.array([[50, 60, 70, 80], [100, 120, 130, 150],\n                      [150, 160, 190, 200], [250, 260, 350, 360]]),\n            'labels':\n            np.array([0, 0, 0, 0])\n        }\n    }]\n    mmcv.dump(fake_pkl, pkl_name)\n\n\ndef _create_dummy_results():\n    boxes = [\n        np.array([[50, 60, 70, 80, 1.0], [100, 120, 130, 150, 0.98],\n                  [150, 160, 190, 200, 0.96], [250, 260, 350, 360, 0.95]])\n    ]\n    return [boxes]\n\n\n@pytest.mark.parametrize('config_path',\n                         ['./configs/_base_/datasets/voc0712.py'])\ndef test_dataset_init(config_path, monkeypatch):\n    data_config = mmcv.Config.fromfile(config_path)\n    if 'data' not in data_config:\n        return\n\n    monkeypatch.chdir('./tests/')  # to use ./tests/data\n    stage_names = ['train', 'val', 'test']\n    for stage_name in stage_names:\n        dataset_config = copy.deepcopy(data_config.data.get(stage_name))\n        dataset = build_dataset(dataset_config)\n        dataset[0]\n\n\ndef test_dataset_evaluation():\n    tmp_dir = tempfile.TemporaryDirectory()\n    # create dummy data\n    fake_json_file = osp.join(tmp_dir.name, 'fake_data.json')\n    _create_dummy_coco_json(fake_json_file)\n\n    # test single coco dataset evaluation\n    coco_dataset = CocoDataset(\n        ann_file=fake_json_file, classes=('car', ), pipeline=[])\n    fake_results = _create_dummy_results()\n    eval_results = coco_dataset.evaluate(fake_results, classwise=True)\n    assert eval_results['bbox_mAP'] == 1\n    assert eval_results['bbox_mAP_50'] == 1\n    assert eval_results['bbox_mAP_75'] == 1\n\n    # test concat dataset evaluation\n    fake_concat_results = _create_dummy_results() + _create_dummy_results()\n\n    # build concat dataset through two config dict\n    coco_cfg = dict(\n        type='CocoDataset',\n        ann_file=fake_json_file,\n        classes=('car', ),\n        pipeline=[])\n    concat_cfgs = [coco_cfg, coco_cfg]\n    concat_dataset = build_dataset(concat_cfgs)\n    eval_results = concat_dataset.evaluate(fake_concat_results)\n    assert eval_results['0_bbox_mAP'] == 1\n    assert eval_results['0_bbox_mAP_50'] == 1\n    assert eval_results['0_bbox_mAP_75'] == 1\n    assert eval_results['1_bbox_mAP'] == 1\n    assert eval_results['1_bbox_mAP_50'] == 1\n    assert eval_results['1_bbox_mAP_75'] == 1\n\n    # build concat dataset through concatenated ann_file\n    coco_cfg = dict(\n        type='CocoDataset',\n        ann_file=[fake_json_file, fake_json_file],\n        classes=('car', ),\n        pipeline=[])\n    concat_dataset = build_dataset(coco_cfg)\n    eval_results = concat_dataset.evaluate(fake_concat_results)\n    assert eval_results['0_bbox_mAP'] == 1\n    assert eval_results['0_bbox_mAP_50'] == 1\n    assert eval_results['0_bbox_mAP_75'] == 1\n    assert eval_results['1_bbox_mAP'] == 1\n    assert eval_results['1_bbox_mAP_50'] == 1\n    assert eval_results['1_bbox_mAP_75'] == 1\n\n    # create dummy data\n    fake_pkl_file = osp.join(tmp_dir.name, 'fake_data.pkl')\n    _create_dummy_custom_pkl(fake_pkl_file)\n\n    # test single custom dataset evaluation\n    custom_dataset = CustomDataset(\n        ann_file=fake_pkl_file, classes=('car', ), pipeline=[])\n    fake_results = _create_dummy_results()\n    eval_results = custom_dataset.evaluate(fake_results)\n    assert eval_results['mAP'] == 1\n\n    # test concat dataset evaluation\n    fake_concat_results = _create_dummy_results() + _create_dummy_results()\n\n    # build concat dataset through two config dict\n    custom_cfg = dict(\n        type='CustomDataset',\n        ann_file=fake_pkl_file,\n        classes=('car', ),\n        pipeline=[])\n    concat_cfgs = [custom_cfg, custom_cfg]\n    concat_dataset = build_dataset(concat_cfgs)\n    eval_results = concat_dataset.evaluate(fake_concat_results)\n    assert eval_results['0_mAP'] == 1\n    assert eval_results['1_mAP'] == 1\n\n    # build concat dataset through concatenated ann_file\n    concat_cfg = dict(\n        type='CustomDataset',\n        ann_file=[fake_pkl_file, fake_pkl_file],\n        classes=('car', ),\n        pipeline=[])\n    concat_dataset = build_dataset(concat_cfg)\n    eval_results = concat_dataset.evaluate(fake_concat_results)\n    assert eval_results['0_mAP'] == 1\n    assert eval_results['1_mAP'] == 1\n\n    # build concat dataset through explicit type\n    concat_cfg = dict(\n        type='ConcatDataset',\n        datasets=[custom_cfg, custom_cfg],\n        separate_eval=False)\n    concat_dataset = build_dataset(concat_cfg)\n    eval_results = concat_dataset.evaluate(fake_concat_results, metric='mAP')\n    assert eval_results['mAP'] == 1\n    assert len(concat_dataset.datasets[0].data_infos) == \\\n        len(concat_dataset.datasets[1].data_infos)\n    assert len(concat_dataset.datasets[0].data_infos) == 1\n    tmp_dir.cleanup()\n\n\n@patch('mmdet.apis.single_gpu_test', MagicMock)\n@patch('mmdet.apis.multi_gpu_test', MagicMock)\n@pytest.mark.parametrize('EvalHookParam', (EvalHook, DistEvalHook))\ndef test_evaluation_hook(EvalHookParam):\n    # create dummy data\n    dataloader = DataLoader(torch.ones((5, 2)))\n\n    # 0.1. dataloader is not a DataLoader object\n    with pytest.raises(TypeError):\n        EvalHookParam(dataloader=MagicMock(), interval=-1)\n\n    # 0.2. negative interval\n    with pytest.raises(ValueError):\n        EvalHookParam(dataloader, interval=-1)\n\n    # 1. start=None, interval=1: perform evaluation after each epoch.\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(dataloader, interval=1)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 2)\n    assert evalhook.evaluate.call_count == 2  # after epoch 1 & 2\n\n    # 2. start=1, interval=1: perform evaluation after each epoch.\n    runner = _build_demo_runner()\n\n    evalhook = EvalHookParam(dataloader, start=1, interval=1)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 2)\n    assert evalhook.evaluate.call_count == 2  # after epoch 1 & 2\n\n    # 3. start=None, interval=2: perform evaluation after epoch 2, 4, 6, etc\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(dataloader, interval=2)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 2)\n    assert evalhook.evaluate.call_count == 1  # after epoch 2\n\n    # 4. start=1, interval=2: perform evaluation after epoch 1, 3, 5, etc\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(dataloader, start=1, interval=2)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 3)\n    assert evalhook.evaluate.call_count == 2  # after epoch 1 & 3\n\n    # 5. start=0/negative, interval=1: perform evaluation after each epoch and\n    #    before epoch 1.\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(dataloader, start=0)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 2)\n    assert evalhook.evaluate.call_count == 3  # before epoch1 and after e1 & e2\n\n    # 6. start=0, interval=2, dynamic_intervals=[(3, 1)]: the evaluation\n    # interval is 2 when it is less than 3 epoch, otherwise it is 1.\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(\n        dataloader, start=0, interval=2, dynamic_intervals=[(3, 1)])\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 4)\n    assert evalhook.evaluate.call_count == 3\n\n    # the evaluation start epoch cannot be less than 0\n    runner = _build_demo_runner()\n    with pytest.raises(ValueError):\n        EvalHookParam(dataloader, start=-2)\n\n    evalhook = EvalHookParam(dataloader, start=0)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner.run([dataloader], [('train', 1)], 2)\n    assert evalhook.evaluate.call_count == 3  # before epoch1 and after e1 & e2\n\n    # 6. resuming from epoch i, start = x (x<=i), interval =1: perform\n    #    evaluation after each epoch and before the first epoch.\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(dataloader, start=1)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner._epoch = 2\n    runner.run([dataloader], [('train', 1)], 3)\n    assert evalhook.evaluate.call_count == 2  # before & after epoch 3\n\n    # 7. resuming from epoch i, start = i+1/None, interval =1: perform\n    #    evaluation after each epoch.\n    runner = _build_demo_runner()\n    evalhook = EvalHookParam(dataloader, start=2)\n    evalhook.evaluate = MagicMock()\n    runner.register_hook(evalhook)\n    runner._epoch = 1\n    runner.run([dataloader], [('train', 1)], 3)\n    assert evalhook.evaluate.call_count == 2  # after epoch 2 & 3\n\n\ndef _build_demo_runner():\n\n    class Model(nn.Module):\n\n        def __init__(self):\n            super().__init__()\n            self.linear = nn.Linear(2, 1)\n\n        def forward(self, x):\n            return self.linear(x)\n\n        def train_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n        def val_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n    model = Model()\n    tmp_dir = tempfile.mkdtemp()\n\n    runner = EpochBasedRunner(\n        model=model, work_dir=tmp_dir, logger=logging.getLogger())\n    return runner\n\n\n@pytest.mark.parametrize('classes, expected_length', [(['bus'], 2),\n                                                      (['car'], 1),\n                                                      (['bus', 'car'], 2)])\ndef test_allow_empty_images(classes, expected_length):\n    dataset_class = DATASETS.get('CocoDataset')\n    # Filter empty images\n    filtered_dataset = dataset_class(\n        ann_file='tests/data/coco_sample.json',\n        img_prefix='tests/data',\n        pipeline=[],\n        classes=classes,\n        filter_empty_gt=True)\n\n    # Get all\n    full_dataset = dataset_class(\n        ann_file='tests/data/coco_sample.json',\n        img_prefix='tests/data',\n        pipeline=[],\n        classes=classes,\n        filter_empty_gt=False)\n\n    assert len(filtered_dataset) == expected_length\n    assert len(filtered_dataset.img_ids) == expected_length\n    assert len(full_dataset) == 3\n    assert len(full_dataset.img_ids) == 3\n    assert filtered_dataset.CLASSES == classes\n    assert full_dataset.CLASSES == classes\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_custom_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport unittest\nfrom unittest.mock import MagicMock, patch\n\nimport pytest\n\nfrom mmdet.datasets import DATASETS\n\n\n@patch('mmdet.datasets.CocoDataset.load_annotations', MagicMock())\n@patch('mmdet.datasets.CustomDataset.load_annotations', MagicMock())\n@patch('mmdet.datasets.XMLDataset.load_annotations', MagicMock())\n@patch('mmdet.datasets.CityscapesDataset.load_annotations', MagicMock())\n@patch('mmdet.datasets.CocoDataset._filter_imgs', MagicMock)\n@patch('mmdet.datasets.CustomDataset._filter_imgs', MagicMock)\n@patch('mmdet.datasets.XMLDataset._filter_imgs', MagicMock)\n@patch('mmdet.datasets.CityscapesDataset._filter_imgs', MagicMock)\n@pytest.mark.parametrize('dataset',\n                         ['CocoDataset', 'VOCDataset', 'CityscapesDataset'])\ndef test_custom_classes_override_default(dataset):\n    dataset_class = DATASETS.get(dataset)\n    if dataset in ['CocoDataset', 'CityscapesDataset']:\n        dataset_class.coco = MagicMock()\n        dataset_class.cat_ids = MagicMock()\n\n    original_classes = dataset_class.CLASSES\n\n    # Test setting classes as a tuple\n    custom_dataset = dataset_class(\n        ann_file=MagicMock(),\n        pipeline=[],\n        classes=('bus', 'car'),\n        test_mode=True,\n        img_prefix='VOC2007' if dataset == 'VOCDataset' else '')\n\n    assert custom_dataset.CLASSES != original_classes\n    assert custom_dataset.CLASSES == ('bus', 'car')\n    print(custom_dataset)\n\n    # Test setting classes as a list\n    custom_dataset = dataset_class(\n        ann_file=MagicMock(),\n        pipeline=[],\n        classes=['bus', 'car'],\n        test_mode=True,\n        img_prefix='VOC2007' if dataset == 'VOCDataset' else '')\n\n    assert custom_dataset.CLASSES != original_classes\n    assert custom_dataset.CLASSES == ['bus', 'car']\n    print(custom_dataset)\n\n    # Test overriding not a subset\n    custom_dataset = dataset_class(\n        ann_file=MagicMock(),\n        pipeline=[],\n        classes=['foo'],\n        test_mode=True,\n        img_prefix='VOC2007' if dataset == 'VOCDataset' else '')\n\n    assert custom_dataset.CLASSES != original_classes\n    assert custom_dataset.CLASSES == ['foo']\n    print(custom_dataset)\n\n    # Test default behavior\n    custom_dataset = dataset_class(\n        ann_file=MagicMock(),\n        pipeline=[],\n        classes=None,\n        test_mode=True,\n        img_prefix='VOC2007' if dataset == 'VOCDataset' else '')\n\n    assert custom_dataset.CLASSES == original_classes\n    print(custom_dataset)\n\n    # Test sending file path\n    import tempfile\n    with tempfile.TemporaryDirectory() as tmpdir:\n        path = tmpdir + 'classes.txt'\n        with open(path, 'w') as f:\n            f.write('bus\\ncar\\n')\n    custom_dataset = dataset_class(\n        ann_file=MagicMock(),\n        pipeline=[],\n        classes=path,\n        test_mode=True,\n        img_prefix='VOC2007' if dataset == 'VOCDataset' else '')\n\n    assert custom_dataset.CLASSES != original_classes\n    assert custom_dataset.CLASSES == ['bus', 'car']\n    print(custom_dataset)\n\n\nclass CustomDatasetTests(unittest.TestCase):\n\n    def setUp(self):\n        super().setUp()\n        self.data_dir = osp.join(\n            osp.dirname(osp.dirname(osp.dirname(__file__))), 'data')\n        self.dataset_class = DATASETS.get('XMLDataset')\n\n    def test_data_infos__default_db_directories(self):\n        \"\"\"Test correct data read having a Pacal-VOC directory structure.\"\"\"\n        test_dataset_root = osp.join(self.data_dir, 'VOCdevkit', 'VOC2007')\n        custom_ds = self.dataset_class(\n            data_root=test_dataset_root,\n            ann_file=osp.join(test_dataset_root, 'ImageSets', 'Main',\n                              'trainval.txt'),\n            pipeline=[],\n            classes=('person', 'dog'),\n            test_mode=True)\n\n        self.assertListEqual([{\n            'id': '000001',\n            'filename': osp.join('JPEGImages', '000001.jpg'),\n            'width': 353,\n            'height': 500\n        }], custom_ds.data_infos)\n\n    def test_data_infos__overridden_db_subdirectories(self):\n        \"\"\"Test correct data read having a customized directory structure.\"\"\"\n        test_dataset_root = osp.join(self.data_dir, 'custom_dataset')\n        custom_ds = self.dataset_class(\n            data_root=test_dataset_root,\n            ann_file=osp.join(test_dataset_root, 'trainval.txt'),\n            pipeline=[],\n            classes=('person', 'dog'),\n            test_mode=True,\n            img_prefix='',\n            img_subdir='images',\n            ann_subdir='images')\n\n        self.assertListEqual([{\n            'id': '000001',\n            'filename': osp.join('images', '000001.jpg'),\n            'width': 353,\n            'height': 500\n        }], custom_ds.data_infos)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_dataset_wrapper.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport bisect\nimport math\nfrom collections import defaultdict\nfrom unittest.mock import MagicMock\n\nimport numpy as np\nimport pytest\n\nfrom mmdet.datasets import (ClassBalancedDataset, ConcatDataset, CustomDataset,\n                            MultiImageMixDataset, RepeatDataset)\n\n\ndef test_dataset_wrapper():\n    CustomDataset.load_annotations = MagicMock()\n    CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: idx)\n    dataset_a = CustomDataset(\n        ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')\n    len_a = 10\n    cat_ids_list_a = [\n        np.random.randint(0, 80, num).tolist()\n        for num in np.random.randint(1, 20, len_a)\n    ]\n    ann_info_list_a = []\n    for _ in range(len_a):\n        height = np.random.randint(10, 30)\n        weight = np.random.randint(10, 30)\n        img = np.ones((height, weight, 3))\n        gt_bbox = np.concatenate([\n            np.random.randint(1, 5, (2, 2)),\n            np.random.randint(1, 5, (2, 2)) + 5\n        ],\n                                 axis=1)\n        gt_labels = np.random.randint(0, 80, 2)\n        ann_info_list_a.append(\n            dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))\n    dataset_a.data_infos = MagicMock()\n    dataset_a.data_infos.__len__.return_value = len_a\n    dataset_a.get_cat_ids = MagicMock(\n        side_effect=lambda idx: cat_ids_list_a[idx])\n    dataset_a.get_ann_info = MagicMock(\n        side_effect=lambda idx: ann_info_list_a[idx])\n    dataset_b = CustomDataset(\n        ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')\n    len_b = 20\n    cat_ids_list_b = [\n        np.random.randint(0, 80, num).tolist()\n        for num in np.random.randint(1, 20, len_b)\n    ]\n    ann_info_list_b = []\n    for _ in range(len_b):\n        height = np.random.randint(10, 30)\n        weight = np.random.randint(10, 30)\n        img = np.ones((height, weight, 3))\n        gt_bbox = np.concatenate([\n            np.random.randint(1, 5, (2, 2)),\n            np.random.randint(1, 5, (2, 2)) + 5\n        ],\n                                 axis=1)\n        gt_labels = np.random.randint(0, 80, 2)\n        ann_info_list_b.append(\n            dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))\n    dataset_b.data_infos = MagicMock()\n    dataset_b.data_infos.__len__.return_value = len_b\n    dataset_b.get_cat_ids = MagicMock(\n        side_effect=lambda idx: cat_ids_list_b[idx])\n    dataset_b.get_ann_info = MagicMock(\n        side_effect=lambda idx: ann_info_list_b[idx])\n\n    concat_dataset = ConcatDataset([dataset_a, dataset_b])\n    assert concat_dataset[5] == 5\n    assert concat_dataset[25] == 15\n    assert concat_dataset.get_cat_ids(5) == cat_ids_list_a[5]\n    assert concat_dataset.get_cat_ids(25) == cat_ids_list_b[15]\n    assert concat_dataset.get_ann_info(5) == ann_info_list_a[5]\n    assert concat_dataset.get_ann_info(25) == ann_info_list_b[15]\n    assert len(concat_dataset) == len(dataset_a) + len(dataset_b)\n\n    # Test if ConcatDataset allows dataset classes without the PALETTE\n    # attribute\n    palette_backup = CustomDataset.PALETTE\n    delattr(CustomDataset, 'PALETTE')\n    concat_dataset = ConcatDataset([dataset_a, dataset_b])\n    assert concat_dataset.PALETTE is None\n    CustomDataset.PALETTE = palette_backup\n\n    repeat_dataset = RepeatDataset(dataset_a, 10)\n    assert repeat_dataset[5] == 5\n    assert repeat_dataset[15] == 5\n    assert repeat_dataset[27] == 7\n    assert repeat_dataset.get_cat_ids(5) == cat_ids_list_a[5]\n    assert repeat_dataset.get_cat_ids(15) == cat_ids_list_a[5]\n    assert repeat_dataset.get_cat_ids(27) == cat_ids_list_a[7]\n    assert repeat_dataset.get_ann_info(5) == ann_info_list_a[5]\n    assert repeat_dataset.get_ann_info(15) == ann_info_list_a[5]\n    assert repeat_dataset.get_ann_info(27) == ann_info_list_a[7]\n    assert len(repeat_dataset) == 10 * len(dataset_a)\n\n    # Test if RepeatDataset allows dataset classes without the PALETTE\n    # attribute\n    delattr(CustomDataset, 'PALETTE')\n    repeat_dataset = RepeatDataset(dataset_a, 10)\n    assert repeat_dataset.PALETTE is None\n    CustomDataset.PALETTE = palette_backup\n\n    category_freq = defaultdict(int)\n    for cat_ids in cat_ids_list_a:\n        cat_ids = set(cat_ids)\n        for cat_id in cat_ids:\n            category_freq[cat_id] += 1\n    for k, v in category_freq.items():\n        category_freq[k] = v / len(cat_ids_list_a)\n\n    mean_freq = np.mean(list(category_freq.values()))\n    repeat_thr = mean_freq\n\n    category_repeat = {\n        cat_id: max(1.0, math.sqrt(repeat_thr / cat_freq))\n        for cat_id, cat_freq in category_freq.items()\n    }\n\n    repeat_factors = []\n    for cat_ids in cat_ids_list_a:\n        cat_ids = set(cat_ids)\n        repeat_factor = max({category_repeat[cat_id] for cat_id in cat_ids})\n        repeat_factors.append(math.ceil(repeat_factor))\n    repeat_factors_cumsum = np.cumsum(repeat_factors)\n    repeat_factor_dataset = ClassBalancedDataset(dataset_a, repeat_thr)\n    assert len(repeat_factor_dataset) == repeat_factors_cumsum[-1]\n    for idx in np.random.randint(0, len(repeat_factor_dataset), 3):\n        assert repeat_factor_dataset[idx] == bisect.bisect_right(\n            repeat_factors_cumsum, idx)\n        assert repeat_factor_dataset.get_ann_info(idx) == ann_info_list_a[\n            bisect.bisect_right(repeat_factors_cumsum, idx)]\n    # Test if ClassBalancedDataset allows dataset classes without the PALETTE\n    # attribute\n    delattr(CustomDataset, 'PALETTE')\n    repeat_factor_dataset = ClassBalancedDataset(dataset_a, repeat_thr)\n    assert repeat_factor_dataset.PALETTE is None\n    CustomDataset.PALETTE = palette_backup\n\n    img_scale = (60, 60)\n    pipeline = [\n        dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),\n        dict(\n            type='RandomAffine',\n            scaling_ratio_range=(0.1, 2),\n            border=(-img_scale[0] // 2, -img_scale[1] // 2)),\n        dict(\n            type='MixUp',\n            img_scale=img_scale,\n            ratio_range=(0.8, 1.6),\n            pad_val=114.0),\n        dict(type='RandomFlip', flip_ratio=0.5),\n        dict(type='Resize', img_scale=img_scale, keep_ratio=True),\n        dict(type='Pad', pad_to_square=True, pad_val=114.0),\n    ]\n\n    CustomDataset.load_annotations = MagicMock()\n    results = []\n    for _ in range(2):\n        height = np.random.randint(10, 30)\n        weight = np.random.randint(10, 30)\n        img = np.ones((height, weight, 3))\n        gt_bbox = np.concatenate([\n            np.random.randint(1, 5, (2, 2)),\n            np.random.randint(1, 5, (2, 2)) + 5\n        ],\n                                 axis=1)\n        gt_labels = np.random.randint(0, 80, 2)\n        results.append(dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))\n\n    CustomDataset.__getitem__ = MagicMock(side_effect=lambda idx: results[idx])\n    dataset_a = CustomDataset(\n        ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')\n    len_a = 2\n    cat_ids_list_a = [\n        np.random.randint(0, 80, num).tolist()\n        for num in np.random.randint(1, 20, len_a)\n    ]\n    dataset_a.data_infos = MagicMock()\n    dataset_a.data_infos.__len__.return_value = len_a\n    dataset_a.get_cat_ids = MagicMock(\n        side_effect=lambda idx: cat_ids_list_a[idx])\n\n    # test dynamic_scale deprecated\n    with pytest.raises(RuntimeError):\n        MultiImageMixDataset(dataset_a, pipeline, (80, 80))\n\n    multi_image_mix_dataset = MultiImageMixDataset(dataset_a, pipeline)\n    for idx in range(len_a):\n        results_ = multi_image_mix_dataset[idx]\n        assert results_['img'].shape == (img_scale[0], img_scale[1], 3)\n\n    # test skip_type_keys\n    multi_image_mix_dataset = MultiImageMixDataset(\n        dataset_a,\n        pipeline,\n        skip_type_keys=('MixUp', 'RandomFlip', 'Resize', 'Pad'))\n    for idx in range(len_a):\n        results_ = multi_image_mix_dataset[idx]\n        assert results_['img'].shape == (img_scale[0], img_scale[1], 3)\n\n    # Test if MultiImageMixDataset allows dataset classes without the PALETTE\n    # attribute\n    delattr(CustomDataset, 'PALETTE')\n    multi_image_mix_dataset = MultiImageMixDataset(dataset_a, pipeline)\n    assert multi_image_mix_dataset.PALETTE is None\n    CustomDataset.PALETTE = palette_backup\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_openimages_dataset.py",
    "content": "import csv\nimport os.path as osp\nimport tempfile\n\nimport mmcv\nimport numpy as np\nimport pytest\n\nfrom mmdet.datasets import OpenImagesChallengeDataset, OpenImagesDataset\n\n\ndef _create_ids_error_oid_csv(\n    label_file,\n    fake_csv_file,\n):\n    label_description = ['/m/000002', 'Football']\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(label_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerow(label_description)\n\n    header = [\n        'ImageID', 'Source', 'LabelName', 'Confidence', 'XMin', 'XMax', 'YMin',\n        'YMax', 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction',\n        'IsInside'\n    ]\n    annotations = [[\n        'color', 'xclick', '/m/000002', '1', '0.022673031', '0.9642005',\n        '0.07103825', '0.80054647', '0', '0', '0', '0', '0'\n    ],\n                   [\n                       '000595fe6fee6369', 'xclick', '/m/000000', '1', '0',\n                       '1', '0', '1', '0', '0', '1', '0', '0'\n                   ]]\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(fake_csv_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerow(header)\n        f_csv.writerows(annotations)\n\n\ndef _create_oid_style_ann(label_file, csv_file, label_level_file):\n    label_description = [['/m/000000', 'Sports equipment'],\n                         ['/m/000001', 'Ball'], ['/m/000002', 'Football'],\n                         ['/m/000004', 'Bicycle']]\n    with open(label_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerows(label_description)\n\n    header = [\n        'ImageID', 'Source', 'LabelName', 'Confidence', 'XMin', 'XMax', 'YMin',\n        'YMax', 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction',\n        'IsInside'\n    ]\n    annotations = [\n        [\n            'color', 'xclick', '/m/000002', 1, 0.0333333, 0.1, 0.0333333, 0.1,\n            0, 0, 1, 0, 0\n        ],\n        [\n            'color', 'xclick', '/m/000002', 1, 0.1, 0.166667, 0.1, 0.166667, 0,\n            0, 0, 0, 0\n        ],\n    ]\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(csv_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerow(header)\n        f_csv.writerows(annotations)\n\n    header = ['ImageID', 'Source', 'LabelName', 'Confidence']\n    annotations = [['color', 'xclick', '/m/000002', '1'],\n                   ['color', 'xclick', '/m/000004', '0']]\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(label_level_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerow(header)\n        f_csv.writerows(annotations)\n\n\ndef _create_hierarchy_json(hierarchy_name):\n    fake_hierarchy = \\\n        {'LabelName':  '/m/0bl9f',      # entity label\n         'Subcategory': [\n             {\n                 'LabelName': '/m/000000',\n                 'Subcategory':\n                     [\n                         {'LabelName': '/m/000001',\n                          'Subcategory':\n                              [\n                                  {\n                                      'LabelName': '/m/000002'\n                                  }\n                              ]\n                          },\n                         {\n                             'LabelName': '/m/000004'\n                         }\n                     ]\n             }\n         ]\n         }\n\n    mmcv.dump(fake_hierarchy, hierarchy_name)\n\n\ndef _create_hierarchy_np(hierarchy_name):\n    fake_hierarchy = np.array([[0, 1, 0, 0, 0], [0, 1, 1, 0,\n                                                 0], [0, 1, 1, 1, 0],\n                               [0, 1, 0, 0, 1], [0, 0, 0, 0, 0]])\n    with open(hierarchy_name, 'wb') as f:\n        np.save(f, fake_hierarchy)\n\n\ndef _create_dummy_results():\n    boxes = [\n        np.zeros((0, 5)),\n        np.zeros((0, 5)),\n        np.array([[10, 10, 15, 15, 1.0], [15, 15, 30, 30, 0.98],\n                  [10, 10, 25, 25, 0.98], [28, 28, 35, 35, 0.97],\n                  [30, 30, 51, 51, 0.96], [100, 110, 120, 130, 0.15]]),\n        np.array([[30, 30, 50, 50, 0.51]]),\n    ]\n    return [boxes]\n\n\ndef _creat_oid_challenge_style_ann(txt_file, label_file, label_level_file):\n    bboxes = [\n        'validation/color.jpg\\n',\n        '4 29\\n',\n        '2\\n',\n        '1 0.0333333 0.1 0.0333333 0.1 1\\n',\n        '1 0.1 0.166667 0.1 0.166667 0\\n',\n    ]\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(txt_file, 'w', newline='') as f:\n        f.writelines(bboxes)\n        f.close()\n\n    label_description = [['/m/000000', 'Sports equipment', 1],\n                         ['/m/000001', 'Ball', 2],\n                         ['/m/000002', 'Football', 3],\n                         ['/m/000004', 'Bicycle', 4]]\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(label_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerows(label_description)\n\n    header = ['ImageID', 'LabelName', 'Confidence']\n    annotations = [['color', '/m/000001', '1'], ['color', '/m/000000', '0']]\n    # `newline=''` is used to avoid index error of out of bounds\n    # in Windows system\n    with open(label_level_file, 'w', newline='') as f:\n        f_csv = csv.writer(f)\n        f_csv.writerow(header)\n        f_csv.writerows(annotations)\n\n\ndef _create_metas(meta_file):\n\n    fake_meta = [{\n        'filename': 'data/OpenImages/OpenImages/validation/color.jpg',\n        'ori_shape': (300, 300, 3)\n    }]\n    mmcv.dump(fake_meta, meta_file)\n\n\ndef test_oid_annotation_ids_unique():\n    # create fake ann files\n    tmp_dir = tempfile.TemporaryDirectory()\n    fake_label_file = osp.join(tmp_dir.name, 'fake_label.csv')\n    fake_ann_file = osp.join(tmp_dir.name, 'fake_ann.csv')\n    _create_ids_error_oid_csv(fake_label_file, fake_ann_file)\n\n    # test annotation ids not unique error\n    with pytest.raises(AssertionError):\n        OpenImagesDataset(\n            ann_file=fake_ann_file, label_file=fake_label_file, pipeline=[])\n    tmp_dir.cleanup()\n\n\ndef test_openimages_dataset():\n    # create fake ann files\n    tmp_dir = tempfile.TemporaryDirectory()\n    label_file = osp.join(tmp_dir.name, 'label_file.csv')\n    ann_file = osp.join(tmp_dir.name, 'ann_file.csv')\n    label_level_file = osp.join(tmp_dir.name, 'label_level_file.csv')\n    _create_oid_style_ann(label_file, ann_file, label_level_file)\n\n    hierarchy_json = osp.join(tmp_dir.name, 'hierarchy.json')\n    _create_hierarchy_json(hierarchy_json)\n\n    # test whether hierarchy_file is not None when set\n    # get_parent_classes is True\n    with pytest.raises(AssertionError):\n        OpenImagesDataset(\n            ann_file=ann_file,\n            label_file=label_file,\n            image_level_ann_file=label_level_file,\n            pipeline=[])\n\n    dataset = OpenImagesDataset(\n        ann_file=ann_file,\n        label_file=label_file,\n        image_level_ann_file=label_level_file,\n        hierarchy_file=hierarchy_json,\n        pipeline=[])\n    ann = dataset.get_ann_info(0)\n    # two legal detection bboxes with `group_of` parameter\n    assert ann['bboxes'].shape[0] == ann['labels'].shape[0] == \\\n           ann['gt_is_group_ofs'].shape[0] == 2\n\n    # test load metas from pipeline\n    img_norm_cfg = dict(\n        mean=[123.675, 116.28, 103.53],\n        std=[58.395, 57.12, 57.375],\n        to_rgb=True)\n    test_pipeline = [\n        dict(type='LoadImageFromFile'),\n        dict(\n            type='MultiScaleFlipAug',\n            img_scale=(128, 128),\n            flip=False,\n            transforms=[\n                dict(type='Resize', keep_ratio=True),\n                dict(type='RandomFlip'),\n                dict(type='Normalize', **img_norm_cfg),\n                dict(type='Pad', size_divisor=32),\n                dict(type='ImageToTensor', keys=['img']),\n                dict(type='Collect', keys=['img']),\n            ])\n    ]\n    dataset = OpenImagesDataset(\n        ann_file=ann_file,\n        img_prefix='tests/data',\n        label_file=label_file,\n        image_level_ann_file=label_level_file,\n        load_from_file=False,\n        hierarchy_file=hierarchy_json,\n        pipeline=test_pipeline)\n    dataset.prepare_test_img(0)\n    assert len(dataset.test_img_metas) == 1\n    result = _create_dummy_results()\n    dataset.evaluate(result)\n\n    # test get hierarchy for classes\n    hierarchy_json = osp.join(tmp_dir.name, 'hierarchy.json')\n    _create_hierarchy_json(hierarchy_json)\n\n    # test with hierarchy file wrong suffix\n    with pytest.raises(AssertionError):\n        fake_path = osp.join(tmp_dir.name, 'hierarchy.csv')\n        OpenImagesDataset(\n            ann_file=ann_file,\n            img_prefix='tests/data',\n            label_file=label_file,\n            image_level_ann_file=label_level_file,\n            load_from_file=False,\n            hierarchy_file=fake_path,\n            pipeline=test_pipeline)\n\n    # test load hierarchy file succseefully\n    hierarchy = dataset.get_relation_matrix(hierarchy_json)\n    hierarchy_gt = np.array([[1, 0, 0, 0], [1, 1, 0, 0], [1, 1, 1, 0],\n                             [1, 0, 0, 1]])\n    assert np.equal(hierarchy, hierarchy_gt).all()\n\n    # test evaluation\n    # create fake metas\n    meta_file = osp.join(tmp_dir.name, 'meta.pkl')\n    _create_metas(meta_file)\n\n    dataset = OpenImagesDataset(\n        ann_file=ann_file,\n        label_file=label_file,\n        image_level_ann_file=label_level_file,\n        hierarchy_file=hierarchy_json,\n        meta_file=meta_file,\n        pipeline=[])\n    # test evaluation with using group_of, adding father classes to\n    # GT and annotations, and considering image_level_image,\n    # In the first label (Sports equipment): tp = [0, 1, 0, 0, 1],\n    # fp = [1, 0, 1, 1, 0]\n    # In the second label (Ball), tp = [0, 1, 0, 1], fp = [1, 0, 1, 0].\n    # In the third label (Football), tp = [0, 1, 0, 1], fp = [1, 0, 1, 0].\n    # In the forth label (Bicycle), tp = [0], fp = [1].\n    result = _create_dummy_results()\n    parsed_results = dataset.evaluate(result)\n    assert np.isclose(parsed_results['mAP'], 0.8333, 1e-4)\n\n    dataset = OpenImagesDataset(\n        ann_file=ann_file,\n        label_file=label_file,\n        load_image_level_labels=False,\n        image_level_ann_file=label_level_file,\n        hierarchy_file=hierarchy_json,\n        meta_file=meta_file,\n        pipeline=[])\n\n    # test evaluation with using group_of, adding father classes to\n    # GT and annotations, and not considering image_level_image,\n    # In the first label (Sports equipment): tp = [0, 1, 0, 0, 1],\n    # fp = [1, 0, 1, 1, 0]\n    # In the second label (Ball), tp = [0, 1, 0, 1], fp = [1, 0, 1, 0].\n    # In the third label (Football), tp = [0, 1, 0, 1], fp = [1, 0, 1, 0].\n    # In the forth label (Bicycle), tp = [], fp = [].\n    result = _create_dummy_results()\n    parsed_results = dataset.evaluate(result)\n    assert np.isclose(parsed_results['mAP'], 0.8333, 1e-4)\n    tmp_dir.cleanup()\n\n\ndef test_openimages_challenge_dataset():\n    # create fake ann files\n    tmp_dir = tempfile.TemporaryDirectory()\n    ann_file = osp.join(tmp_dir.name, 'ann_file.txt')\n    label_file = osp.join(tmp_dir.name, 'label_file.csv')\n    label_level_file = osp.join(tmp_dir.name, 'label_level_file.csv')\n    _creat_oid_challenge_style_ann(ann_file, label_file, label_level_file)\n\n    dataset = OpenImagesChallengeDataset(\n        ann_file=ann_file,\n        label_file=label_file,\n        load_image_level_labels=False,\n        get_supercategory=False,\n        pipeline=[])\n    ann = dataset.get_ann_info(0)\n\n    # two legal detection bboxes with `group_of` parameter\n    assert ann['bboxes'].shape[0] == ann['labels'].shape[0] == \\\n           ann['gt_is_group_ofs'].shape[0] == 2\n\n    dataset.prepare_train_img(0)\n    dataset.prepare_test_img(0)\n\n    meta_file = osp.join(tmp_dir.name, 'meta.pkl')\n    _create_metas(meta_file)\n\n    result = _create_dummy_results()\n    with pytest.raises(AssertionError):\n        fake_json = osp.join(tmp_dir.name, 'hierarchy.json')\n        OpenImagesChallengeDataset(\n            ann_file=ann_file,\n            label_file=label_file,\n            image_level_ann_file=label_level_file,\n            hierarchy_file=fake_json,\n            meta_file=meta_file,\n            pipeline=[])\n\n    hierarchy_file = osp.join(tmp_dir.name, 'hierarchy.np')\n    _create_hierarchy_np(hierarchy_file)\n    dataset = OpenImagesChallengeDataset(\n        ann_file=ann_file,\n        label_file=label_file,\n        image_level_ann_file=label_level_file,\n        hierarchy_file=hierarchy_file,\n        meta_file=meta_file,\n        pipeline=[])\n    dataset.evaluate(result)\n    tmp_dir.cleanup()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_panoptic_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport tempfile\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet.core import encode_mask_results\nfrom mmdet.datasets.api_wrappers import pq_compute_single_core\nfrom mmdet.datasets.coco_panoptic import INSTANCE_OFFSET, CocoPanopticDataset\n\ntry:\n    from panopticapi.utils import id2rgb\nexcept ImportError:\n    id2rgb = None\n\n\ndef _create_panoptic_style_json(json_name):\n    image1 = {\n        'id': 0,\n        'width': 640,\n        'height': 640,\n        'file_name': 'fake_name1.jpg',\n    }\n\n    image2 = {\n        'id': 1,\n        'width': 640,\n        'height': 800,\n        'file_name': 'fake_name2.jpg',\n    }\n\n    images = [image1, image2]\n\n    annotations = [\n        {\n            'segments_info': [{\n                'id': 1,\n                'category_id': 0,\n                'area': 400,\n                'bbox': [50, 60, 20, 20],\n                'iscrowd': 0\n            }, {\n                'id': 2,\n                'category_id': 1,\n                'area': 900,\n                'bbox': [100, 120, 30, 30],\n                'iscrowd': 0\n            }, {\n                'id': 3,\n                'category_id': 2,\n                'iscrowd': 0,\n                'bbox': [1, 189, 612, 285],\n                'area': 70036\n            }],\n            'file_name':\n            'fake_name1.jpg',\n            'image_id':\n            0\n        },\n        {\n            'segments_info': [\n                {\n                    # Different to instance style json, there\n                    # are duplicate ids in panoptic style json\n                    'id': 1,\n                    'category_id': 0,\n                    'area': 400,\n                    'bbox': [50, 60, 20, 20],\n                    'iscrowd': 0\n                },\n                {\n                    'id': 4,\n                    'category_id': 1,\n                    'area': 900,\n                    'bbox': [100, 120, 30, 30],\n                    'iscrowd': 1\n                },\n                {\n                    'id': 5,\n                    'category_id': 2,\n                    'iscrowd': 0,\n                    'bbox': [100, 200, 200, 300],\n                    'area': 66666\n                },\n                {\n                    'id': 6,\n                    'category_id': 0,\n                    'iscrowd': 0,\n                    'bbox': [1, 189, -10, 285],\n                    'area': 70036\n                }\n            ],\n            'file_name':\n            'fake_name2.jpg',\n            'image_id':\n            1\n        }\n    ]\n\n    categories = [{\n        'id': 0,\n        'name': 'car',\n        'supercategory': 'car',\n        'isthing': 1\n    }, {\n        'id': 1,\n        'name': 'person',\n        'supercategory': 'person',\n        'isthing': 1\n    }, {\n        'id': 2,\n        'name': 'wall',\n        'supercategory': 'wall',\n        'isthing': 0\n    }]\n\n    fake_json = {\n        'images': images,\n        'annotations': annotations,\n        'categories': categories\n    }\n    mmcv.dump(fake_json, json_name)\n\n    return fake_json\n\n\ndef test_load_panoptic_style_json():\n    tmp_dir = tempfile.TemporaryDirectory()\n    fake_json_file = osp.join(tmp_dir.name, 'fake_data.json')\n    fake_json = _create_panoptic_style_json(fake_json_file)\n\n    dataset = CocoPanopticDataset(\n        ann_file=fake_json_file,\n        classes=[cat['name'] for cat in fake_json['categories']],\n        pipeline=[])\n\n    ann = dataset.get_ann_info(0)\n\n    # two legal instances\n    assert ann['bboxes'].shape[0] == ann['labels'].shape[0] == 2\n    # three masks for both foreground and background\n    assert len(ann['masks']) == 3\n\n    ann = dataset.get_ann_info(1)\n\n    # one legal instance, one illegal instance,\n    # one crowd instance and one background mask\n    assert ann['bboxes'].shape[0] == ann['labels'].shape[0] == 1\n    assert ann['bboxes_ignore'].shape[0] == 1\n    assert len(ann['masks']) == 3\n\n\ndef _create_panoptic_gt_annotations(ann_file):\n    categories = [{\n        'id': 0,\n        'name': 'person',\n        'supercategory': 'person',\n        'isthing': 1\n    }, {\n        'id': 1,\n        'name': 'dog',\n        'supercategory': 'dog',\n        'isthing': 1\n    }, {\n        'id': 2,\n        'name': 'wall',\n        'supercategory': 'wall',\n        'isthing': 0\n    }]\n\n    images = [{\n        'id': 0,\n        'width': 80,\n        'height': 60,\n        'file_name': 'fake_name1.jpg',\n    }]\n\n    annotations = [{\n        'segments_info': [{\n            'id': 1,\n            'category_id': 0,\n            'area': 400,\n            'bbox': [10, 10, 10, 40],\n            'iscrowd': 0\n        }, {\n            'id': 2,\n            'category_id': 0,\n            'area': 400,\n            'bbox': [30, 10, 10, 40],\n            'iscrowd': 0\n        }, {\n            'id': 3,\n            'category_id': 1,\n            'iscrowd': 0,\n            'bbox': [50, 10, 10, 5],\n            'area': 50\n        }, {\n            'id': 4,\n            'category_id': 2,\n            'iscrowd': 0,\n            'bbox': [0, 0, 80, 60],\n            'area': 3950\n        }],\n        'file_name':\n        'fake_name1.png',\n        'image_id':\n        0\n    }]\n\n    gt_json = {\n        'images': images,\n        'annotations': annotations,\n        'categories': categories\n    }\n\n    # 4 is the id of the background class annotation.\n    gt = np.zeros((60, 80), dtype=np.int64) + 4\n    gt_bboxes = np.array([[10, 10, 10, 40], [30, 10, 10, 40], [50, 10, 10, 5]],\n                         dtype=np.int64)\n    for i in range(3):\n        x, y, w, h = gt_bboxes[i]\n        gt[y:y + h, x:x + w] = i + 1  # id starts from 1\n\n    gt = id2rgb(gt).astype(np.uint8)\n    img_path = osp.join(osp.dirname(ann_file), 'fake_name1.png')\n    mmcv.imwrite(gt[:, :, ::-1], img_path)\n\n    mmcv.dump(gt_json, ann_file)\n    return gt_json\n\n\ndef test_panoptic_evaluation():\n    if id2rgb is None:\n        return\n\n    # TP for background class, IoU=3576/4324=0.827\n    # 2 the category id of the background class\n    pred = np.zeros((60, 80), dtype=np.int64) + 2\n    pred_bboxes = np.array(\n        [\n            [11, 11, 10, 40],  # TP IoU=351/449=0.78\n            [38, 10, 10, 40],  # FP\n            [51, 10, 10, 5]\n        ],  # TP IoU=45/55=0.818\n        dtype=np.int64)\n    pred_labels = np.array([0, 0, 1], dtype=np.int64)\n    for i in range(3):\n        x, y, w, h = pred_bboxes[i]\n        pred[y:y + h, x:x + w] = (i + 1) * INSTANCE_OFFSET + pred_labels[i]\n\n    tmp_dir = tempfile.TemporaryDirectory()\n    ann_file = osp.join(tmp_dir.name, 'panoptic.json')\n    gt_json = _create_panoptic_gt_annotations(ann_file)\n\n    results = [{'pan_results': pred}]\n\n    dataset = CocoPanopticDataset(\n        ann_file=ann_file,\n        seg_prefix=tmp_dir.name,\n        classes=[cat['name'] for cat in gt_json['categories']],\n        pipeline=[])\n\n    # For 'person', sq = 0.78 / 1, rq = 1 / 2( 1 tp + 0.5 * (1 fn + 1 fp))\n    # For 'dog', sq = 0.818, rq = 1 / 1\n    # For 'wall', sq = 0.827, rq = 1 / 1\n    # Here is the results for all classes:\n    # +--------+--------+--------+---------+------------+\n    # |        | PQ     | SQ     | RQ      | categories |\n    # +--------+--------+--------+---------+------------+\n    # | All    | 67.869 | 80.898 | 83.333  |      3     |\n    # | Things | 60.453 | 79.996 | 75.000  |      2     |\n    # | Stuff  | 82.701 | 82.701 | 100.000 |      1     |\n    # +--------+--------+--------+---------+------------+\n    parsed_results = dataset.evaluate(results)\n    assert np.isclose(parsed_results['PQ'], 67.869)\n    assert np.isclose(parsed_results['SQ'], 80.898)\n    assert np.isclose(parsed_results['RQ'], 83.333)\n    assert np.isclose(parsed_results['PQ_th'], 60.453)\n    assert np.isclose(parsed_results['SQ_th'], 79.996)\n    assert np.isclose(parsed_results['RQ_th'], 75.000)\n    assert np.isclose(parsed_results['PQ_st'], 82.701)\n    assert np.isclose(parsed_results['SQ_st'], 82.701)\n    assert np.isclose(parsed_results['RQ_st'], 100.000)\n\n    # test jsonfile_prefix\n    outfile_prefix = osp.join(tmp_dir.name, 'results')\n    parsed_results = dataset.evaluate(results, jsonfile_prefix=outfile_prefix)\n    assert np.isclose(parsed_results['PQ'], 67.869)\n    assert np.isclose(parsed_results['SQ'], 80.898)\n    assert np.isclose(parsed_results['RQ'], 83.333)\n    assert np.isclose(parsed_results['PQ_th'], 60.453)\n    assert np.isclose(parsed_results['SQ_th'], 79.996)\n    assert np.isclose(parsed_results['RQ_th'], 75.000)\n    assert np.isclose(parsed_results['PQ_st'], 82.701)\n    assert np.isclose(parsed_results['SQ_st'], 82.701)\n    assert np.isclose(parsed_results['RQ_st'], 100.000)\n\n    # test classwise\n    parsed_results = dataset.evaluate(results, classwise=True)\n    assert np.isclose(parsed_results['PQ'], 67.869)\n    assert np.isclose(parsed_results['SQ'], 80.898)\n    assert np.isclose(parsed_results['RQ'], 83.333)\n    assert np.isclose(parsed_results['PQ_th'], 60.453)\n    assert np.isclose(parsed_results['SQ_th'], 79.996)\n    assert np.isclose(parsed_results['RQ_th'], 75.000)\n    assert np.isclose(parsed_results['PQ_st'], 82.701)\n    assert np.isclose(parsed_results['SQ_st'], 82.701)\n    assert np.isclose(parsed_results['RQ_st'], 100.000)\n\n    # test the api wrapper of `pq_compute_single_core`\n    # Codes are copied from `coco_panoptic.py` and modified\n    result_files, _ = dataset.format_results(\n        results, jsonfile_prefix=outfile_prefix)\n\n    imgs = dataset.coco.imgs\n    gt_json = dataset.coco.img_ann_map  # image to annotations\n    gt_json = [{\n        'image_id': k,\n        'segments_info': v,\n        'file_name': imgs[k]['segm_file']\n    } for k, v in gt_json.items()]\n    pred_json = mmcv.load(result_files['panoptic'])\n    pred_json = dict((el['image_id'], el) for el in pred_json['annotations'])\n\n    # match the gt_anns and pred_anns in the same image\n    matched_annotations_list = []\n    for gt_ann in gt_json:\n        img_id = gt_ann['image_id']\n        matched_annotations_list.append((gt_ann, pred_json[img_id]))\n    gt_folder = dataset.seg_prefix\n    pred_folder = osp.join(osp.dirname(outfile_prefix), 'panoptic')\n\n    pq_stat = pq_compute_single_core(0, matched_annotations_list, gt_folder,\n                                     pred_folder, dataset.categories)\n    pq_all = pq_stat.pq_average(dataset.categories, isthing=None)[0]\n    assert np.isclose(pq_all['pq'] * 100, 67.869)\n    assert np.isclose(pq_all['sq'] * 100, 80.898)\n    assert np.isclose(pq_all['rq'] * 100, 83.333)\n    assert pq_all['n'] == 3\n\n\ndef _create_instance_segmentation_gt_annotations(ann_file):\n    categories = [{\n        'id': 0,\n        'name': 'person',\n        'supercategory': 'person',\n        'isthing': 1\n    }, {\n        'id': 1,\n        'name': 'dog',\n        'supercategory': 'dog',\n        'isthing': 1\n    }, {\n        'id': 2,\n        'name': 'wall',\n        'supercategory': 'wall',\n        'isthing': 0\n    }]\n\n    images = [{\n        'id': 0,\n        'width': 80,\n        'height': 60,\n        'file_name': 'fake_name1.jpg',\n    }]\n\n    person1_polygon = [10, 10, 20, 10, 20, 50, 10, 50, 10, 10]\n    person2_polygon = [30, 10, 40, 10, 40, 50, 30, 50, 30, 10]\n    dog_polygon = [50, 10, 60, 10, 60, 15, 50, 15, 50, 10]\n\n    annotations = [\n        {\n            'id': 0,\n            'image_id': 0,\n            'category_id': 0,\n            'segmentation': [person1_polygon],\n            'area': 400,\n            'bbox': [10, 10, 10, 40],\n            'iscrowd': 0\n        },\n        {\n            'id': 1,\n            'image_id': 0,\n            'category_id': 0,\n            'segmentation': [person2_polygon],\n            'area': 400,\n            'bbox': [30, 10, 10, 40],\n            'iscrowd': 0\n        },\n        {\n            'id': 2,\n            'image_id': 0,\n            'category_id': 1,\n            'segmentation': [dog_polygon],\n            'area': 50,\n            'bbox': [50, 10, 10, 5],\n            'iscrowd': 0\n        },\n    ]\n\n    gt_json = {\n        'images': images,\n        'annotations': annotations,\n        'categories': categories\n    }\n\n    mmcv.dump(gt_json, ann_file)\n\n\ndef test_instance_segmentation_evaluation():\n    pred_bbox = [\n        np.array([[11, 10, 20, 50, 0.8], [31, 10, 40, 50, 0.8]]),\n        np.array([[51, 10, 60, 15, 0.7]])\n    ]\n\n    person1_mask = np.zeros((60, 80), dtype=bool)\n    person1_mask[20:50, 11:20] = True\n    person2_mask = np.zeros((60, 80), dtype=bool)\n    person2_mask[20:50, 31:40] = True\n    dog_mask = np.zeros((60, 80), dtype=bool)\n    dog_mask[10:15, 51:60] = True\n\n    pred_mask = [[person1_mask, person2_mask], [\n        dog_mask,\n    ]]\n    results = [{'ins_results': (pred_bbox, encode_mask_results(pred_mask))}]\n\n    tmp_dir = tempfile.TemporaryDirectory()\n    pan_ann_file = osp.join(tmp_dir.name, 'panoptic.json')\n    ins_ann_file = osp.join(tmp_dir.name, 'instance.json')\n    _create_panoptic_gt_annotations(pan_ann_file)\n    _create_instance_segmentation_gt_annotations(ins_ann_file)\n\n    dataset = CocoPanopticDataset(\n        ann_file=pan_ann_file,\n        ins_ann_file=ins_ann_file,\n        seg_prefix=tmp_dir.name,\n        pipeline=[])\n    dataset.THING_CLASSES = ['person', 'dog']\n    dataset.STUFF_CLASSES = ['wall']\n    dataset.CLASSES = dataset.THING_CLASSES + dataset.STUFF_CLASSES\n    parsed_results = dataset.evaluate(results, metric=['segm', 'bbox'])\n\n    # Here is the results for instance segmentation:\n    # {\n    #     'segm_mAP': 0.5, 'segm_mAP_50': 0.626, 'segm_mAP_75': 0.5,\n    #     'segm_mAP_s': 0.5, 'segm_mAP_m': -1.0, 'segm_mAP_l': -1.0,\n    #     'segm_mAP_copypaste': '0.500 0.626 0.500 0.500 -1.000 -1.000',\n    #     'bbox_mAP': 0.564, 'bbox_mAP_50': 0.626, 'bbox_mAP_75': 0.626,\n    #     'bbox_mAP_s': 0.564, 'bbox_mAP_m': -1.0, 'bbox_mAP_l': -1.0,\n    #     'bbox_mAP_copypaste': '0.564 0.626 0.626 0.564 -1.000 -1.000'\n    # }\n\n    assert np.isclose(parsed_results['segm_mAP'], 0.5)\n    assert np.isclose(parsed_results['bbox_mAP'], 0.564)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_datasets/test_xml_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\n\nfrom mmdet.datasets import DATASETS\n\n\ndef test_xml_dataset():\n    dataconfig = {\n        'ann_file': 'data/VOCdevkit/VOC2007/ImageSets/Main/test.txt',\n        'img_prefix': 'data/VOCdevkit/VOC2007/',\n        'pipeline': [{\n            'type': 'LoadImageFromFile'\n        }]\n    }\n    XMLDataset = DATASETS.get('XMLDataset')\n\n    class XMLDatasetSubClass(XMLDataset):\n        CLASSES = None\n\n    # get_ann_info and _filter_imgs of XMLDataset\n    # would use self.CLASSES, we added CLASSES not NONE\n    with pytest.raises(AssertionError):\n        XMLDatasetSubClass(**dataconfig)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_formatting.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\n\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.datasets.builder import PIPELINES\n\n\ndef test_default_format_bundle():\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../data'),\n        img_info=dict(filename='color.jpg'))\n    load = dict(type='LoadImageFromFile')\n    load = build_from_cfg(load, PIPELINES)\n    bundle = dict(type='DefaultFormatBundle')\n    bundle = build_from_cfg(bundle, PIPELINES)\n    results = load(results)\n    assert 'pad_shape' not in results\n    assert 'scale_factor' not in results\n    assert 'img_norm_cfg' not in results\n    results = bundle(results)\n    assert 'pad_shape' in results\n    assert 'scale_factor' in results\n    assert 'img_norm_cfg' in results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_loading.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport os.path as osp\n\nimport mmcv\nimport numpy as np\nimport pytest\n\nfrom mmdet.core.mask import BitmapMasks, PolygonMasks\nfrom mmdet.datasets.pipelines import (FilterAnnotations, LoadImageFromFile,\n                                      LoadImageFromWebcam,\n                                      LoadMultiChannelImageFromFiles)\n\n\nclass TestLoading:\n\n    @classmethod\n    def setup_class(cls):\n        cls.data_prefix = osp.join(osp.dirname(__file__), '../../data')\n\n    def test_load_img(self):\n        results = dict(\n            img_prefix=self.data_prefix, img_info=dict(filename='color.jpg'))\n        transform = LoadImageFromFile()\n        results = transform(copy.deepcopy(results))\n        assert results['filename'] == osp.join(self.data_prefix, 'color.jpg')\n        assert results['ori_filename'] == 'color.jpg'\n        assert results['img'].shape == (288, 512, 3)\n        assert results['img'].dtype == np.uint8\n        assert results['img_shape'] == (288, 512, 3)\n        assert results['ori_shape'] == (288, 512, 3)\n        assert repr(transform) == transform.__class__.__name__ + \\\n            \"(to_float32=False, color_type='color', channel_order='bgr', \" + \\\n            \"file_client_args={'backend': 'disk'})\"\n\n        # no img_prefix\n        results = dict(\n            img_prefix=None, img_info=dict(filename='tests/data/color.jpg'))\n        transform = LoadImageFromFile()\n        results = transform(copy.deepcopy(results))\n        assert results['filename'] == 'tests/data/color.jpg'\n        assert results['ori_filename'] == 'tests/data/color.jpg'\n        assert results['img'].shape == (288, 512, 3)\n\n        # to_float32\n        transform = LoadImageFromFile(to_float32=True)\n        results = transform(copy.deepcopy(results))\n        assert results['img'].dtype == np.float32\n\n        # gray image\n        results = dict(\n            img_prefix=self.data_prefix, img_info=dict(filename='gray.jpg'))\n        transform = LoadImageFromFile()\n        results = transform(copy.deepcopy(results))\n        assert results['img'].shape == (288, 512, 3)\n        assert results['img'].dtype == np.uint8\n\n        transform = LoadImageFromFile(color_type='unchanged')\n        results = transform(copy.deepcopy(results))\n        assert results['img'].shape == (288, 512)\n        assert results['img'].dtype == np.uint8\n\n    def test_load_multi_channel_img(self):\n        results = dict(\n            img_prefix=self.data_prefix,\n            img_info=dict(filename=['color.jpg', 'color.jpg']))\n        transform = LoadMultiChannelImageFromFiles()\n        results = transform(copy.deepcopy(results))\n        assert results['filename'] == [\n            osp.join(self.data_prefix, 'color.jpg'),\n            osp.join(self.data_prefix, 'color.jpg')\n        ]\n        assert results['ori_filename'] == ['color.jpg', 'color.jpg']\n        assert results['img'].shape == (288, 512, 3, 2)\n        assert results['img'].dtype == np.uint8\n        assert results['img_shape'] == (288, 512, 3, 2)\n        assert results['ori_shape'] == (288, 512, 3, 2)\n        assert results['pad_shape'] == (288, 512, 3, 2)\n        assert results['scale_factor'] == 1.0\n        assert repr(transform) == transform.__class__.__name__ + \\\n            \"(to_float32=False, color_type='unchanged', \" + \\\n            \"file_client_args={'backend': 'disk'})\"\n\n    def test_load_webcam_img(self):\n        img = mmcv.imread(osp.join(self.data_prefix, 'color.jpg'))\n        results = dict(img=img)\n        transform = LoadImageFromWebcam()\n        results = transform(copy.deepcopy(results))\n        assert results['filename'] is None\n        assert results['ori_filename'] is None\n        assert results['img'].shape == (288, 512, 3)\n        assert results['img'].dtype == np.uint8\n        assert results['img_shape'] == (288, 512, 3)\n        assert results['ori_shape'] == (288, 512, 3)\n\n\ndef _build_filter_annotations_args():\n    kwargs = (dict(min_gt_bbox_wh=(100, 100)),\n              dict(min_gt_bbox_wh=(100, 100), keep_empty=False),\n              dict(min_gt_bbox_wh=(1, 1)), dict(min_gt_bbox_wh=(.01, .01)),\n              dict(min_gt_bbox_wh=(.01, .01),\n                   by_mask=True), dict(by_mask=True),\n              dict(by_box=False, by_mask=True))\n    targets = (None, 0, 1, 2, 1, 1, 1)\n\n    return list(zip(targets, kwargs))\n\n\n@pytest.mark.parametrize('target, kwargs', _build_filter_annotations_args())\ndef test_filter_annotations(target, kwargs):\n    filter_ann = FilterAnnotations(**kwargs)\n    bboxes = np.array([[2., 10., 4., 14.], [2., 10., 2.1, 10.1]])\n    raw_masks = np.zeros((2, 24, 24))\n    raw_masks[0, 10:14, 2:4] = 1\n    bitmap_masks = BitmapMasks(raw_masks, 24, 24)\n    results = dict(gt_bboxes=bboxes, gt_masks=bitmap_masks)\n    results = filter_ann(results)\n    if results is not None:\n        results = results['gt_bboxes'].shape[0]\n    assert results == target\n\n    polygons = [[np.array([2.0, 10.0, 4.0, 10.0, 4.0, 14.0, 2.0, 14.0])],\n                [np.array([2.0, 10.0, 2.1, 10.0, 2.1, 10.1, 2.0, 10.1])]]\n    polygon_masks = PolygonMasks(polygons, 24, 24)\n\n    results = dict(gt_bboxes=bboxes, gt_masks=polygon_masks)\n    results = filter_ann(results)\n\n    if results is not None:\n        results = len(results.get('gt_masks').masks)\n\n    assert results == target\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_sampler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.bbox.assigners import MaxIoUAssigner\nfrom mmdet.core.bbox.samplers import (OHEMSampler, RandomSampler,\n                                      ScoreHLRSampler)\n\n\ndef test_random_sampler():\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([1, 2])\n    gt_bboxes_ignore = torch.Tensor([\n        [30, 30, 40, 40],\n    ])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n\n    sampler = RandomSampler(\n        num=10, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=True)\n\n    sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)\n\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n\ndef test_random_sampler_empty_gt():\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.empty(0, 4)\n    gt_labels = torch.empty(0, ).long()\n    assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n\n    sampler = RandomSampler(\n        num=10, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=True)\n\n    sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)\n\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n\ndef test_random_sampler_empty_pred():\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.empty(0, 4)\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([1, 2])\n    assign_result = assigner.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n\n    sampler = RandomSampler(\n        num=10, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=True)\n\n    sample_result = sampler.sample(assign_result, bboxes, gt_bboxes, gt_labels)\n\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n\ndef _context_for_ohem():\n    import sys\n    from os.path import dirname\n    sys.path.insert(0, dirname(dirname(dirname(__file__))))\n    from test_models.test_forward import _get_detector_cfg\n\n    model = _get_detector_cfg(\n        'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py')\n    model['pretrained'] = None\n\n    from mmdet.models import build_detector\n    context = build_detector(model).roi_head\n    return context\n\n\ndef test_ohem_sampler():\n\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([1, 2])\n    gt_bboxes_ignore = torch.Tensor([\n        [30, 30, 40, 40],\n    ])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n\n    context = _context_for_ohem()\n\n    sampler = OHEMSampler(\n        num=10,\n        pos_fraction=0.5,\n        context=context,\n        neg_pos_ub=-1,\n        add_gt_as_proposals=True)\n\n    feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]\n    sample_result = sampler.sample(\n        assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)\n\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n\ndef test_ohem_sampler_empty_gt():\n\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.empty(0, 4)\n    gt_labels = torch.LongTensor([])\n    gt_bboxes_ignore = torch.Tensor([])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n\n    context = _context_for_ohem()\n\n    sampler = OHEMSampler(\n        num=10,\n        pos_fraction=0.5,\n        context=context,\n        neg_pos_ub=-1,\n        add_gt_as_proposals=True)\n\n    feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]\n\n    sample_result = sampler.sample(\n        assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)\n\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n\ndef test_ohem_sampler_empty_pred():\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.empty(0, 4)\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_labels = torch.LongTensor([1, 2, 2, 3])\n    gt_bboxes_ignore = torch.Tensor([])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n\n    context = _context_for_ohem()\n\n    sampler = OHEMSampler(\n        num=10,\n        pos_fraction=0.5,\n        context=context,\n        neg_pos_ub=-1,\n        add_gt_as_proposals=True)\n\n    feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]\n\n    sample_result = sampler.sample(\n        assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)\n\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n\ndef test_random_sample_result():\n    from mmdet.core.bbox.samplers.sampling_result import SamplingResult\n    SamplingResult.random(num_gts=0, num_preds=0)\n    SamplingResult.random(num_gts=0, num_preds=3)\n    SamplingResult.random(num_gts=3, num_preds=3)\n    SamplingResult.random(num_gts=0, num_preds=3)\n    SamplingResult.random(num_gts=7, num_preds=7)\n    SamplingResult.random(num_gts=7, num_preds=64)\n    SamplingResult.random(num_gts=24, num_preds=3)\n\n    for i in range(3):\n        SamplingResult.random(rng=i)\n\n\ndef test_score_hlr_sampler_empty_pred():\n    assigner = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    context = _context_for_ohem()\n    sampler = ScoreHLRSampler(\n        num=10,\n        pos_fraction=0.5,\n        context=context,\n        neg_pos_ub=-1,\n        add_gt_as_proposals=True)\n    gt_bboxes_ignore = torch.Tensor([])\n    feats = [torch.rand(1, 256, int(2**i), int(2**i)) for i in [6, 5, 4, 3, 2]]\n\n    # empty bbox\n    bboxes = torch.empty(0, 4)\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_labels = torch.LongTensor([1, 2, 2, 3])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n    sample_result, _ = sampler.sample(\n        assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)\n    assert len(sample_result.neg_inds) == 0\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n    # empty gt\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.empty(0, 4)\n    gt_labels = torch.LongTensor([])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n    sample_result, _ = sampler.sample(\n        assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)\n    assert len(sample_result.pos_inds) == 0\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n\n    # non-empty input\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_labels = torch.LongTensor([1, 2, 2, 3])\n    assign_result = assigner.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n    sample_result, _ = sampler.sample(\n        assign_result, bboxes, gt_bboxes, gt_labels, feats=feats)\n    assert len(sample_result.pos_bboxes) == len(sample_result.pos_inds)\n    assert len(sample_result.neg_bboxes) == len(sample_result.neg_inds)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .utils import check_result_same, construct_toy_data, create_random_bboxes\n\n__all__ = ['create_random_bboxes', 'construct_toy_data', 'check_result_same']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/test_img_augment.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport mmcv\nimport numpy as np\nfrom mmcv.utils import build_from_cfg\nfrom numpy.testing import assert_array_equal\n\nfrom mmdet.datasets.builder import PIPELINES\nfrom .utils import construct_toy_data\n\n\ndef test_adjust_color():\n    results = construct_toy_data()\n    # test wighout aug\n    transform = dict(type='ColorTransform', prob=0, level=10)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], results['img'])\n\n    # test with factor 1\n    img = results['img']\n    transform = dict(type='ColorTransform', prob=1, level=10)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], img)\n\n    # test with factor 0\n    transform_module.factor = 0\n    img_gray = mmcv.bgr2gray(img.copy())\n    img_r = np.stack([img_gray, img_gray, img_gray], axis=-1)\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], img_r)\n\n    # test with factor 0.5\n    transform_module.factor = 0.5\n    results_transformed = transform_module(copy.deepcopy(results))\n    img = results['img']\n    assert_array_equal(\n        results_transformed['img'],\n        np.round(np.clip((img * 0.5 + img_r * 0.5), 0, 255)).astype(img.dtype))\n\n\ndef test_imequalize(nb_rand_test=100):\n\n    def _imequalize(img):\n        # equalize the image using PIL.ImageOps.equalize\n        from PIL import Image, ImageOps\n        img = Image.fromarray(img)\n        equalized_img = np.asarray(ImageOps.equalize(img))\n        return equalized_img\n\n    results = construct_toy_data()\n    # test wighout aug\n    transform = dict(type='EqualizeTransform', prob=0)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], results['img'])\n\n    # test equalize with case step=0\n    transform = dict(type='EqualizeTransform', prob=1.)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    img = np.array([[0, 0, 0], [120, 120, 120], [255, 255, 255]],\n                   dtype=np.uint8)\n    img = np.stack([img, img, img], axis=-1)\n    results['img'] = img\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], img)\n\n    # test equalize with randomly sampled image.\n    for _ in range(nb_rand_test):\n        img = np.clip(np.random.uniform(0, 1, (1000, 1200, 3)) * 260, 0,\n                      255).astype(np.uint8)\n        results['img'] = img\n        results_transformed = transform_module(copy.deepcopy(results))\n        assert_array_equal(results_transformed['img'], _imequalize(img))\n\n\ndef test_adjust_brightness(nb_rand_test=100):\n\n    def _adjust_brightness(img, factor):\n        # adjust the brightness of image using\n        # PIL.ImageEnhance.Brightness\n        from PIL import Image\n        from PIL.ImageEnhance import Brightness\n        img = Image.fromarray(img)\n        brightened_img = Brightness(img).enhance(factor)\n        return np.asarray(brightened_img)\n\n    results = construct_toy_data()\n    # test wighout aug\n    transform = dict(type='BrightnessTransform', level=10, prob=0)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], results['img'])\n\n    # test case with factor 1.0\n    transform = dict(type='BrightnessTransform', level=10, prob=1.)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    transform_module.factor = 1.0\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], results['img'])\n\n    # test case with factor 0.0\n    transform_module.factor = 0.0\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'],\n                       np.zeros_like(results['img']))\n\n    # test with randomly sampled images and factors.\n    for _ in range(nb_rand_test):\n        img = np.clip(np.random.uniform(0, 1, (1000, 1200, 3)) * 260, 0,\n                      255).astype(np.uint8)\n        factor = np.random.uniform()\n        transform_module.factor = factor\n        results['img'] = img\n        np.testing.assert_allclose(\n            transform_module(copy.deepcopy(results))['img'].astype(np.int32),\n            _adjust_brightness(img, factor).astype(np.int32),\n            rtol=0,\n            atol=1)\n\n\ndef test_adjust_contrast(nb_rand_test=100):\n\n    def _adjust_contrast(img, factor):\n        from PIL import Image\n        from PIL.ImageEnhance import Contrast\n\n        # Image.fromarray defaultly supports RGB, not BGR.\n        # convert from BGR to RGB\n        img = Image.fromarray(img[..., ::-1], mode='RGB')\n        contrasted_img = Contrast(img).enhance(factor)\n        # convert from RGB to BGR\n        return np.asarray(contrasted_img)[..., ::-1]\n\n    results = construct_toy_data()\n    # test wighout aug\n    transform = dict(type='ContrastTransform', level=10, prob=0)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], results['img'])\n\n    # test case with factor 1.0\n    transform = dict(type='ContrastTransform', level=10, prob=1.)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    transform_module.factor = 1.0\n    results_transformed = transform_module(copy.deepcopy(results))\n    assert_array_equal(results_transformed['img'], results['img'])\n\n    # test case with factor 0.0\n    transform_module.factor = 0.0\n    results_transformed = transform_module(copy.deepcopy(results))\n    np.testing.assert_allclose(\n        results_transformed['img'],\n        _adjust_contrast(results['img'], 0.),\n        rtol=0,\n        atol=1)\n\n    # test adjust_contrast with randomly sampled images and factors.\n    for _ in range(nb_rand_test):\n        img = np.clip(np.random.uniform(0, 1, (1200, 1000, 3)) * 260, 0,\n                      255).astype(np.uint8)\n        factor = np.random.uniform()\n        transform_module.factor = factor\n        results['img'] = img\n        results_transformed = transform_module(copy.deepcopy(results))\n        # Note the gap (less_equal 1) between PIL.ImageEnhance.Contrast\n        # and mmcv.adjust_contrast comes from the gap that converts from\n        # a color image to gray image using mmcv or PIL.\n        np.testing.assert_allclose(\n            transform_module(copy.deepcopy(results))['img'].astype(np.int32),\n            _adjust_contrast(results['img'], factor).astype(np.int32),\n            rtol=0,\n            atol=1)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/test_models_aug_test.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\n\nimport mmcv\nimport torch\nfrom mmcv.parallel import collate\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.datasets.builder import PIPELINES\nfrom mmdet.models import build_detector\n\n\ndef model_aug_test_template(cfg_file):\n    # get config\n    cfg = mmcv.Config.fromfile(cfg_file)\n    # init model\n    cfg.model.pretrained = None\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model)\n\n    # init test pipeline and set aug test\n    load_cfg, multi_scale_cfg = cfg.test_pipeline\n    multi_scale_cfg['flip'] = True\n    multi_scale_cfg['flip_direction'] = ['horizontal', 'vertical', 'diagonal']\n    multi_scale_cfg['img_scale'] = [(1333, 800), (800, 600), (640, 480)]\n\n    load = build_from_cfg(load_cfg, PIPELINES)\n    transform = build_from_cfg(multi_scale_cfg, PIPELINES)\n\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n        img_info=dict(filename='color.jpg'))\n    results = transform(load(results))\n    assert len(results['img']) == 12\n    assert len(results['img_metas']) == 12\n\n    results['img'] = [collate([x]) for x in results['img']]\n    results['img_metas'] = [collate([x]).data[0] for x in results['img_metas']]\n    # aug test the model\n    model.eval()\n    with torch.no_grad():\n        aug_result = model(return_loss=False, rescale=True, **results)\n    return aug_result\n\n\ndef test_aug_test_size():\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n        img_info=dict(filename='color.jpg'))\n\n    # Define simple pipeline\n    load = dict(type='LoadImageFromFile')\n    load = build_from_cfg(load, PIPELINES)\n\n    # get config\n    transform = dict(\n        type='MultiScaleFlipAug',\n        transforms=[],\n        img_scale=[(1333, 800), (800, 600), (640, 480)],\n        flip=True,\n        flip_direction=['horizontal', 'vertical', 'diagonal'])\n    multi_aug_test_module = build_from_cfg(transform, PIPELINES)\n\n    results = load(results)\n    results = multi_aug_test_module(load(results))\n    # len([\"original\", \"horizontal\", \"vertical\", \"diagonal\"]) *\n    # len([(1333, 800), (800, 600), (640, 480)])\n    assert len(results['img']) == 12\n\n\ndef test_cascade_rcnn_aug_test():\n    aug_result = model_aug_test_template(\n        'configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py')\n    assert len(aug_result[0]) == 80\n\n\ndef test_mask_rcnn_aug_test():\n    aug_result = model_aug_test_template(\n        'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')\n    assert len(aug_result[0]) == 2\n    assert len(aug_result[0][0]) == 80\n    assert len(aug_result[0][1]) == 80\n\n\ndef test_htc_aug_test():\n    aug_result = model_aug_test_template('configs/htc/htc_r50_fpn_1x_coco.py')\n    assert len(aug_result[0]) == 2\n    assert len(aug_result[0][0]) == 80\n    assert len(aug_result[0][1]) == 80\n\n\ndef test_scnet_aug_test():\n    aug_result = model_aug_test_template(\n        'configs/scnet/scnet_r50_fpn_1x_coco.py')\n    assert len(aug_result[0]) == 2\n    assert len(aug_result[0][0]) == 80\n    assert len(aug_result[0][1]) == 80\n\n\ndef test_cornernet_aug_test():\n    # get config\n    cfg = mmcv.Config.fromfile(\n        'configs/cornernet/cornernet_hourglass104_mstest_10x5_210e_coco.py')\n    # init model\n    cfg.model.pretrained = None\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model)\n\n    # init test pipeline and set aug test\n    load_cfg, multi_scale_cfg = cfg.test_pipeline\n    multi_scale_cfg['flip'] = True\n    multi_scale_cfg['flip_direction'] = ['horizontal', 'vertical', 'diagonal']\n    multi_scale_cfg['scale_factor'] = [0.5, 1.0, 2.0]\n\n    load = build_from_cfg(load_cfg, PIPELINES)\n    transform = build_from_cfg(multi_scale_cfg, PIPELINES)\n\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n        img_info=dict(filename='color.jpg'))\n    results = transform(load(results))\n    assert len(results['img']) == 12\n    assert len(results['img_metas']) == 12\n\n    results['img'] = [collate([x]) for x in results['img']]\n    results['img_metas'] = [collate([x]).data[0] for x in results['img_metas']]\n    # aug test the model\n    model.eval()\n    with torch.no_grad():\n        aug_result = model(return_loss=False, rescale=True, **results)\n    assert len(aug_result[0]) == 80\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/test_rotate.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport numpy as np\nimport pytest\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.core.mask import BitmapMasks, PolygonMasks\nfrom mmdet.datasets.builder import PIPELINES\nfrom .utils import check_result_same, construct_toy_data\n\n\ndef test_rotate():\n    # test assertion for invalid type of max_rotate_angle\n    with pytest.raises(AssertionError):\n        transform = dict(type='Rotate', level=1, max_rotate_angle=(30, ))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid type of scale\n    with pytest.raises(AssertionError):\n        transform = dict(type='Rotate', level=2, scale=(1.2, ))\n        build_from_cfg(transform, PIPELINES)\n\n    # test ValueError for invalid type of img_fill_val\n    with pytest.raises(ValueError):\n        transform = dict(\n            type='Rotate', level=2, img_fill_val=[\n                128,\n            ])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid number of elements in center\n    with pytest.raises(AssertionError):\n        transform = dict(type='Rotate', level=2, center=(0.5, ))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid type of center\n    with pytest.raises(AssertionError):\n        transform = dict(type='Rotate', level=2, center=[0, 0])\n        build_from_cfg(transform, PIPELINES)\n\n    # test case when no rotate aug (level=0)\n    results = construct_toy_data()\n    img_fill_val = (104, 116, 124)\n    seg_ignore_label = 255\n    transform = dict(\n        type='Rotate',\n        level=0,\n        prob=1.,\n        img_fill_val=img_fill_val,\n        seg_ignore_label=seg_ignore_label,\n    )\n    rotate_module = build_from_cfg(transform, PIPELINES)\n    results_wo_rotate = rotate_module(copy.deepcopy(results))\n    check_result_same(results, results_wo_rotate)\n\n    # test case when no rotate aug (prob<=0)\n    transform = dict(\n        type='Rotate', level=10, prob=0., img_fill_val=img_fill_val, scale=0.6)\n    rotate_module = build_from_cfg(transform, PIPELINES)\n    results_wo_rotate = rotate_module(copy.deepcopy(results))\n    check_result_same(results, results_wo_rotate)\n\n    # test clockwise rotation with angle 90\n    results = construct_toy_data()\n    img_fill_val = 128\n    transform = dict(\n        type='Rotate',\n        level=10,\n        max_rotate_angle=90,\n        img_fill_val=img_fill_val,\n        # set random_negative_prob to 0 for clockwise rotation\n        random_negative_prob=0.,\n        prob=1.)\n    rotate_module = build_from_cfg(transform, PIPELINES)\n    results_rotated = rotate_module(copy.deepcopy(results))\n    img_r = np.array([[img_fill_val, 6, 2, img_fill_val],\n                      [img_fill_val, 7, 3, img_fill_val]]).astype(np.uint8)\n    img_r = np.stack([img_r, img_r, img_r], axis=-1)\n    results_gt = copy.deepcopy(results)\n    results_gt['img'] = img_r\n    results_gt['gt_bboxes'] = np.array([[1., 0., 2., 1.]], dtype=np.float32)\n    results_gt['gt_bboxes_ignore'] = np.empty((0, 4), dtype=np.float32)\n    gt_masks = np.array([[0, 1, 1, 0], [0, 0, 1, 0]],\n                        dtype=np.uint8)[None, :, :]\n    results_gt['gt_masks'] = BitmapMasks(gt_masks, 2, 4)\n    results_gt['gt_semantic_seg'] = np.array(\n        [[255, 6, 2, 255], [255, 7, 3,\n                            255]]).astype(results['gt_semantic_seg'].dtype)\n    check_result_same(results_gt, results_rotated)\n\n    # test clockwise rotation with angle 90, PolygonMasks\n    results = construct_toy_data(poly2mask=False)\n    results_rotated = rotate_module(copy.deepcopy(results))\n    gt_masks = [[np.array([2, 0, 2, 1, 1, 1, 1, 0], dtype=np.float)]]\n    results_gt['gt_masks'] = PolygonMasks(gt_masks, 2, 4)\n    check_result_same(results_gt, results_rotated)\n\n    # test counter-clockwise rotation with angle 90,\n    # and specify the ratation center\n    img_fill_val = (104, 116, 124)\n    transform = dict(\n        type='Rotate',\n        level=10,\n        max_rotate_angle=90,\n        center=(0, 0),\n        img_fill_val=img_fill_val,\n        # set random_negative_prob to 1 for counter-clockwise rotation\n        random_negative_prob=1.,\n        prob=1.)\n    results = construct_toy_data()\n    rotate_module = build_from_cfg(transform, PIPELINES)\n    results_rotated = rotate_module(copy.deepcopy(results))\n    results_gt = copy.deepcopy(results)\n    h, w = results['img'].shape[:2]\n    img_r = np.stack([\n        np.ones((h, w)) * img_fill_val[0],\n        np.ones((h, w)) * img_fill_val[1],\n        np.ones((h, w)) * img_fill_val[2]\n    ],\n                     axis=-1).astype(np.uint8)\n    img_r[0, 0, :] = 1\n    img_r[0, 1, :] = 5\n    results_gt['img'] = img_r\n    results_gt['gt_bboxes'] = np.empty((0, 4), dtype=np.float32)\n    results_gt['gt_bboxes_ignore'] = np.empty((0, 4), dtype=np.float32)\n    results_gt['gt_labels'] = np.empty((0, ), dtype=np.int64)\n    gt_masks = np.empty((0, h, w), dtype=np.uint8)\n    results_gt['gt_masks'] = BitmapMasks(gt_masks, h, w)\n    gt_seg = (np.ones((h, w)) * 255).astype(results['gt_semantic_seg'].dtype)\n    gt_seg[0, 0], gt_seg[0, 1] = 1, 5\n    results_gt['gt_semantic_seg'] = gt_seg\n    check_result_same(results_gt, results_rotated)\n\n    transform = dict(\n        type='Rotate',\n        level=10,\n        max_rotate_angle=90,\n        center=(0),\n        img_fill_val=img_fill_val,\n        random_negative_prob=1.,\n        prob=1.)\n    rotate_module = build_from_cfg(transform, PIPELINES)\n    results_rotated = rotate_module(copy.deepcopy(results))\n    check_result_same(results_gt, results_rotated)\n\n    # test counter-clockwise rotation with angle 90,\n    # and specify the ratation center, PolygonMasks\n    results = construct_toy_data(poly2mask=False)\n    results_rotated = rotate_module(copy.deepcopy(results))\n    gt_masks = [[np.array([0, 0, 0, 0, 1, 0, 1, 0], dtype=np.float)]]\n    results_gt['gt_masks'] = PolygonMasks(gt_masks, 2, 4)\n    check_result_same(results_gt, results_rotated)\n\n    # test AutoAugment equipped with Rotate\n    policies = [[dict(type='Rotate', level=10, prob=1.)]]\n    autoaug = dict(type='AutoAugment', policies=policies)\n    autoaug_module = build_from_cfg(autoaug, PIPELINES)\n    autoaug_module(copy.deepcopy(results))\n\n    policies = [[\n        dict(type='Rotate', level=10, prob=1.),\n        dict(\n            type='Rotate',\n            level=8,\n            max_rotate_angle=90,\n            center=(0),\n            img_fill_val=img_fill_val)\n    ]]\n    autoaug = dict(type='AutoAugment', policies=policies)\n    autoaug_module = build_from_cfg(autoaug, PIPELINES)\n    autoaug_module(copy.deepcopy(results))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/test_shear.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport numpy as np\nimport pytest\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.core.mask import BitmapMasks, PolygonMasks\nfrom mmdet.datasets.builder import PIPELINES\nfrom .utils import check_result_same, construct_toy_data\n\n\ndef test_shear():\n    # test assertion for invalid type of max_shear_magnitude\n    with pytest.raises(AssertionError):\n        transform = dict(type='Shear', level=1, max_shear_magnitude=(0.5, ))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid value of max_shear_magnitude\n    with pytest.raises(AssertionError):\n        transform = dict(type='Shear', level=2, max_shear_magnitude=1.2)\n        build_from_cfg(transform, PIPELINES)\n\n    # test ValueError for invalid type of img_fill_val\n    with pytest.raises(ValueError):\n        transform = dict(type='Shear', level=2, img_fill_val=[128])\n        build_from_cfg(transform, PIPELINES)\n\n    results = construct_toy_data()\n    # test case when no shear aug (level=0, direction='horizontal')\n    img_fill_val = (104, 116, 124)\n    seg_ignore_label = 255\n    transform = dict(\n        type='Shear',\n        level=0,\n        prob=1.,\n        img_fill_val=img_fill_val,\n        seg_ignore_label=seg_ignore_label,\n        direction='horizontal')\n    shear_module = build_from_cfg(transform, PIPELINES)\n    results_wo_shear = shear_module(copy.deepcopy(results))\n    check_result_same(results, results_wo_shear)\n\n    # test case when no shear aug (level=0, direction='vertical')\n    transform = dict(\n        type='Shear',\n        level=0,\n        prob=1.,\n        img_fill_val=img_fill_val,\n        seg_ignore_label=seg_ignore_label,\n        direction='vertical')\n    shear_module = build_from_cfg(transform, PIPELINES)\n    results_wo_shear = shear_module(copy.deepcopy(results))\n    check_result_same(results, results_wo_shear)\n\n    # test case when no shear aug (prob<=0)\n    transform = dict(\n        type='Shear',\n        level=10,\n        prob=0.,\n        img_fill_val=img_fill_val,\n        direction='vertical')\n    shear_module = build_from_cfg(transform, PIPELINES)\n    results_wo_shear = shear_module(copy.deepcopy(results))\n    check_result_same(results, results_wo_shear)\n\n    # test shear horizontally, magnitude=1\n    transform = dict(\n        type='Shear',\n        level=10,\n        prob=1.,\n        img_fill_val=img_fill_val,\n        direction='horizontal',\n        max_shear_magnitude=1.,\n        random_negative_prob=0.)\n    shear_module = build_from_cfg(transform, PIPELINES)\n    results_sheared = shear_module(copy.deepcopy(results))\n    results_gt = copy.deepcopy(results)\n    img_s = np.array([[1, 2, 3, 4], [0, 5, 6, 7]], dtype=np.uint8)\n    img_s = np.stack([img_s, img_s, img_s], axis=-1)\n    img_s[1, 0, :] = np.array(img_fill_val)\n    results_gt['img'] = img_s\n    results_gt['gt_bboxes'] = np.array([[0., 0., 3., 1.]], dtype=np.float32)\n    results_gt['gt_bboxes_ignore'] = np.array([[2., 0., 4., 1.]],\n                                              dtype=np.float32)\n    gt_masks = np.array([[0, 1, 1, 0], [0, 0, 1, 0]],\n                        dtype=np.uint8)[None, :, :]\n    results_gt['gt_masks'] = BitmapMasks(gt_masks, 2, 4)\n    results_gt['gt_semantic_seg'] = np.array(\n        [[1, 2, 3, 4], [255, 5, 6, 7]], dtype=results['gt_semantic_seg'].dtype)\n    check_result_same(results_gt, results_sheared)\n\n    # test PolygonMasks with shear horizontally, magnitude=1\n    results = construct_toy_data(poly2mask=False)\n    results_sheared = shear_module(copy.deepcopy(results))\n    print(results_sheared['gt_masks'])\n    gt_masks = [[np.array([0, 0, 2, 0, 3, 1, 1, 1], dtype=np.float)]]\n    results_gt['gt_masks'] = PolygonMasks(gt_masks, 2, 4)\n    check_result_same(results_gt, results_sheared)\n\n    # test shear vertically, magnitude=-1\n    img_fill_val = 128\n    results = construct_toy_data()\n    transform = dict(\n        type='Shear',\n        level=10,\n        prob=1.,\n        img_fill_val=img_fill_val,\n        direction='vertical',\n        max_shear_magnitude=1.,\n        random_negative_prob=1.)\n    shear_module = build_from_cfg(transform, PIPELINES)\n    results_sheared = shear_module(copy.deepcopy(results))\n    results_gt = copy.deepcopy(results)\n    img_s = np.array([[1, 6, img_fill_val, img_fill_val],\n                      [5, img_fill_val, img_fill_val, img_fill_val]],\n                     dtype=np.uint8)\n    img_s = np.stack([img_s, img_s, img_s], axis=-1)\n    results_gt['img'] = img_s\n    results_gt['gt_bboxes'] = np.empty((0, 4), dtype=np.float32)\n    results_gt['gt_labels'] = np.empty((0, ), dtype=np.int64)\n    results_gt['gt_bboxes_ignore'] = np.empty((0, 4), dtype=np.float32)\n    gt_masks = np.array([[0, 1, 0, 0], [0, 0, 0, 0]],\n                        dtype=np.uint8)[None, :, :]\n    results_gt['gt_masks'] = BitmapMasks(gt_masks, 2, 4)\n    results_gt['gt_semantic_seg'] = np.array(\n        [[1, 6, 255, 255], [5, 255, 255, 255]],\n        dtype=results['gt_semantic_seg'].dtype)\n    check_result_same(results_gt, results_sheared)\n\n    # test PolygonMasks with shear vertically, magnitude=-1\n    results = construct_toy_data(poly2mask=False)\n    results_sheared = shear_module(copy.deepcopy(results))\n    gt_masks = [[np.array([0, 0, 2, 0, 2, 0, 0, 1], dtype=np.float)]]\n    results_gt['gt_masks'] = PolygonMasks(gt_masks, 2, 4)\n    check_result_same(results_gt, results_sheared)\n\n    results = construct_toy_data()\n    # same mask for BitmapMasks and PolygonMasks\n    results['gt_masks'] = BitmapMasks(\n        np.array([[0, 1, 1, 0], [0, 1, 1, 0]], dtype=np.uint8)[None, :, :], 2,\n        4)\n    results['gt_bboxes'] = np.array([[1., 0., 2., 1.]], dtype=np.float32)\n    results_sheared_bitmap = shear_module(copy.deepcopy(results))\n    check_result_same(results_sheared_bitmap, results_sheared)\n\n    # test AutoAugment equipped with Shear\n    policies = [[dict(type='Shear', level=10, prob=1.)]]\n    autoaug = dict(type='AutoAugment', policies=policies)\n    autoaug_module = build_from_cfg(autoaug, PIPELINES)\n    autoaug_module(copy.deepcopy(results))\n\n    policies = [[\n        dict(type='Shear', level=10, prob=1.),\n        dict(\n            type='Shear',\n            level=8,\n            img_fill_val=img_fill_val,\n            direction='vertical',\n            max_shear_magnitude=1.)\n    ]]\n    autoaug = dict(type='AutoAugment', policies=policies)\n    autoaug_module = build_from_cfg(autoaug, PIPELINES)\n    autoaug_module(copy.deepcopy(results))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/test_transform.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport os.path as osp\n\nimport mmcv\nimport numpy as np\nimport pytest\nimport torch\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\nfrom mmdet.datasets.builder import PIPELINES\nfrom .utils import create_full_masks, create_random_bboxes\n\n\ndef test_resize():\n    # test assertion if img_scale is a list\n    with pytest.raises(AssertionError):\n        transform = dict(type='Resize', img_scale=[1333, 800], keep_ratio=True)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion if len(img_scale) while ratio_range is not None\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='Resize',\n            img_scale=[(1333, 800), (1333, 600)],\n            ratio_range=(0.9, 1.1),\n            keep_ratio=True)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid multiscale_mode\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='Resize',\n            img_scale=[(1333, 800), (1333, 600)],\n            keep_ratio=True,\n            multiscale_mode='2333')\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion if both scale and scale_factor are set\n    with pytest.raises(AssertionError):\n        results = dict(\n            img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n            img_info=dict(filename='color.jpg'))\n        load = dict(type='LoadImageFromFile')\n        load = build_from_cfg(load, PIPELINES)\n        transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True)\n        transform = build_from_cfg(transform, PIPELINES)\n        results = load(results)\n        results['scale'] = (1333, 800)\n        results['scale_factor'] = 1.0\n        results = transform(results)\n\n    transform = dict(type='Resize', img_scale=(1333, 800), keep_ratio=True)\n    resize_module = build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n    results['img2'] = copy.deepcopy(img)\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['img_fields'] = ['img', 'img2']\n\n    results = resize_module(results)\n    assert np.equal(results['img'], results['img2']).all()\n\n    results.pop('scale')\n    results.pop('scale_factor')\n    transform = dict(\n        type='Resize',\n        img_scale=(1280, 800),\n        multiscale_mode='value',\n        keep_ratio=False)\n    resize_module = build_from_cfg(transform, PIPELINES)\n    results = resize_module(results)\n    assert np.equal(results['img'], results['img2']).all()\n    assert results['img_shape'] == (800, 1280, 3)\n    assert results['img'].dtype == results['img'].dtype == np.uint8\n\n    results_seg = {\n        'img': img,\n        'img_shape': img.shape,\n        'ori_shape': img.shape,\n        'gt_semantic_seg': copy.deepcopy(img),\n        'gt_seg': copy.deepcopy(img),\n        'seg_fields': ['gt_semantic_seg', 'gt_seg']\n    }\n    transform = dict(\n        type='Resize',\n        img_scale=(640, 400),\n        multiscale_mode='value',\n        keep_ratio=False)\n    resize_module = build_from_cfg(transform, PIPELINES)\n    results_seg = resize_module(results_seg)\n    assert results_seg['gt_semantic_seg'].shape == results_seg['gt_seg'].shape\n    assert results_seg['img_shape'] == (400, 640, 3)\n    assert results_seg['img_shape'] != results_seg['ori_shape']\n    assert results_seg['gt_semantic_seg'].shape == results_seg['img_shape']\n    assert np.equal(results_seg['gt_semantic_seg'],\n                    results_seg['gt_seg']).all()\n\n\ndef test_flip():\n    # test assertion for invalid flip_ratio\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomFlip', flip_ratio=1.5)\n        build_from_cfg(transform, PIPELINES)\n    # test assertion for 0 <= sum(flip_ratio) <= 1\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomFlip',\n            flip_ratio=[0.7, 0.8],\n            direction=['horizontal', 'vertical'])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for mismatch between number of flip_ratio and direction\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomFlip', flip_ratio=[0.4, 0.5])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid direction\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomFlip', flip_ratio=1., direction='horizonta')\n        build_from_cfg(transform, PIPELINES)\n\n    transform = dict(type='RandomFlip', flip_ratio=1.)\n    flip_module = build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    original_img = copy.deepcopy(img)\n    results['img'] = img\n    results['img2'] = copy.deepcopy(img)\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n    results['img_fields'] = ['img', 'img2']\n\n    results = flip_module(results)\n    assert np.equal(results['img'], results['img2']).all()\n\n    flip_module = build_from_cfg(transform, PIPELINES)\n    results = flip_module(results)\n    assert np.equal(results['img'], results['img2']).all()\n    assert np.equal(original_img, results['img']).all()\n\n    # test flip_ratio is float, direction is list\n    transform = dict(\n        type='RandomFlip',\n        flip_ratio=0.9,\n        direction=['horizontal', 'vertical', 'diagonal'])\n    flip_module = build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    original_img = copy.deepcopy(img)\n    results['img'] = img\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n    results['img_fields'] = ['img']\n    results = flip_module(results)\n    if results['flip']:\n        assert np.array_equal(\n            mmcv.imflip(original_img, results['flip_direction']),\n            results['img'])\n    else:\n        assert np.array_equal(original_img, results['img'])\n\n    # test flip_ratio is list, direction is list\n    transform = dict(\n        type='RandomFlip',\n        flip_ratio=[0.3, 0.3, 0.2],\n        direction=['horizontal', 'vertical', 'diagonal'])\n    flip_module = build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    original_img = copy.deepcopy(img)\n    results['img'] = img\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n    results['img_fields'] = ['img']\n    results = flip_module(results)\n    if results['flip']:\n        assert np.array_equal(\n            mmcv.imflip(original_img, results['flip_direction']),\n            results['img'])\n    else:\n        assert np.array_equal(original_img, results['img'])\n\n\ndef test_random_crop():\n    # test assertion for invalid random crop\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomCrop', crop_size=(-1, 0))\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # TODO: add img_fields test\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n\n    h, w, _ = img.shape\n    gt_bboxes = create_random_bboxes(8, w, h)\n    gt_bboxes_ignore = create_random_bboxes(2, w, h)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    transform = dict(type='RandomCrop', crop_size=(h - 20, w - 20))\n    crop_module = build_from_cfg(transform, PIPELINES)\n    results = crop_module(results)\n    assert results['img'].shape[:2] == (h - 20, w - 20)\n    # All bboxes should be reserved after crop\n    assert results['img_shape'][:2] == (h - 20, w - 20)\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes'].shape[0] == 8\n    assert results['gt_bboxes_ignore'].shape[0] == 2\n\n    def area(bboxes):\n        return np.prod(bboxes[:, 2:4] - bboxes[:, 0:2], axis=1)\n\n    assert (area(results['gt_bboxes']) <= area(gt_bboxes)).all()\n    assert (area(results['gt_bboxes_ignore']) <= area(gt_bboxes_ignore)).all()\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n    # test assertion for invalid crop_type\n    with pytest.raises(ValueError):\n        transform = dict(\n            type='RandomCrop', crop_size=(1, 1), crop_type='unknown')\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid crop_size\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCrop', crop_type='relative', crop_size=(0, 0))\n        build_from_cfg(transform, PIPELINES)\n\n    def _construct_toy_data():\n        img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8)\n        img = np.stack([img, img, img], axis=-1)\n        results = dict()\n        # image\n        results['img'] = img\n        results['img_shape'] = img.shape\n        results['img_fields'] = ['img']\n        # bboxes\n        results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n        results['gt_bboxes'] = np.array([[0., 0., 2., 1.]], dtype=np.float32)\n        results['gt_bboxes_ignore'] = np.array([[2., 0., 3., 1.]],\n                                               dtype=np.float32)\n        # labels\n        results['gt_labels'] = np.array([1], dtype=np.int64)\n        return results\n\n    # test crop_type \"relative_range\"\n    results = _construct_toy_data()\n    transform = dict(\n        type='RandomCrop',\n        crop_type='relative_range',\n        crop_size=(0.3, 0.7),\n        allow_negative_crop=True)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    h, w = results_transformed['img_shape'][:2]\n    assert int(2 * 0.3 + 0.5) <= h <= int(2 * 1 + 0.5)\n    assert int(4 * 0.7 + 0.5) <= w <= int(4 * 1 + 0.5)\n    assert results_transformed['gt_bboxes'].dtype == np.float32\n    assert results_transformed['gt_bboxes_ignore'].dtype == np.float32\n\n    # test crop_type \"relative\"\n    transform = dict(\n        type='RandomCrop',\n        crop_type='relative',\n        crop_size=(0.3, 0.7),\n        allow_negative_crop=True)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    h, w = results_transformed['img_shape'][:2]\n    assert h == int(2 * 0.3 + 0.5) and w == int(4 * 0.7 + 0.5)\n    assert results_transformed['gt_bboxes'].dtype == np.float32\n    assert results_transformed['gt_bboxes_ignore'].dtype == np.float32\n\n    # test crop_type \"absolute\"\n    transform = dict(\n        type='RandomCrop',\n        crop_type='absolute',\n        crop_size=(1, 2),\n        allow_negative_crop=True)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    h, w = results_transformed['img_shape'][:2]\n    assert h == 1 and w == 2\n    assert results_transformed['gt_bboxes'].dtype == np.float32\n    assert results_transformed['gt_bboxes_ignore'].dtype == np.float32\n\n    # test crop_type \"absolute_range\"\n    transform = dict(\n        type='RandomCrop',\n        crop_type='absolute_range',\n        crop_size=(1, 20),\n        allow_negative_crop=True)\n    transform_module = build_from_cfg(transform, PIPELINES)\n    results_transformed = transform_module(copy.deepcopy(results))\n    h, w = results_transformed['img_shape'][:2]\n    assert 1 <= h <= 2 and 1 <= w <= 4\n    assert results_transformed['gt_bboxes'].dtype == np.float32\n    assert results_transformed['gt_bboxes_ignore'].dtype == np.float32\n\n\ndef test_min_iou_random_crop():\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n    h, w, _ = img.shape\n    gt_bboxes = create_random_bboxes(1, w, h)\n    gt_bboxes_ignore = create_random_bboxes(1, w, h)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    transform = dict(type='MinIoURandomCrop')\n    crop_module = build_from_cfg(transform, PIPELINES)\n\n    # Test for img_fields\n    results_test = copy.deepcopy(results)\n    results_test['img1'] = results_test['img']\n    results_test['img_fields'] = ['img', 'img1']\n    with pytest.raises(AssertionError):\n        crop_module(results_test)\n    results = crop_module(results)\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n    patch = np.array([0, 0, results['img_shape'][1], results['img_shape'][0]])\n    ious = bbox_overlaps(patch.reshape(-1, 4),\n                         results['gt_bboxes']).reshape(-1)\n    ious_ignore = bbox_overlaps(\n        patch.reshape(-1, 4), results['gt_bboxes_ignore']).reshape(-1)\n    mode = crop_module.mode\n    if mode == 1:\n        assert np.equal(results['gt_bboxes'], gt_bboxes).all()\n        assert np.equal(results['gt_bboxes_ignore'], gt_bboxes_ignore).all()\n    else:\n        assert (ious >= mode).all()\n        assert (ious_ignore >= mode).all()\n\n\ndef test_pad():\n    # test assertion if both size_divisor and size is None\n    with pytest.raises(AssertionError):\n        transform = dict(type='Pad')\n        build_from_cfg(transform, PIPELINES)\n\n    transform = dict(type='Pad', size_divisor=32)\n    transform = build_from_cfg(transform, PIPELINES)\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    original_img = copy.deepcopy(img)\n    results['img'] = img\n    results['img2'] = copy.deepcopy(img)\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n    results['img_fields'] = ['img', 'img2']\n\n    results = transform(results)\n    assert np.equal(results['img'], results['img2']).all()\n    # original img already divisible by 32\n    assert np.equal(results['img'], original_img).all()\n    img_shape = results['img'].shape\n    assert img_shape[0] % 32 == 0\n    assert img_shape[1] % 32 == 0\n\n    resize_transform = dict(\n        type='Resize', img_scale=(1333, 800), keep_ratio=True)\n    resize_module = build_from_cfg(resize_transform, PIPELINES)\n    results = resize_module(results)\n    results = transform(results)\n    img_shape = results['img'].shape\n    assert np.equal(results['img'], results['img2']).all()\n    assert img_shape[0] % 32 == 0\n    assert img_shape[1] % 32 == 0\n\n    # test the size and size_divisor must be None when pad2square is True\n    with pytest.raises(AssertionError):\n        transform = dict(type='Pad', size_divisor=32, pad_to_square=True)\n        build_from_cfg(transform, PIPELINES)\n\n    transform = dict(type='Pad', pad_to_square=True)\n    transform = build_from_cfg(transform, PIPELINES)\n    results['img'] = img\n    results = transform(results)\n    assert results['img'].shape[0] == results['img'].shape[1]\n\n    # test the pad_val is converted to a dict\n    transform = dict(type='Pad', size_divisor=32, pad_val=0)\n    with pytest.deprecated_call():\n        transform = build_from_cfg(transform, PIPELINES)\n\n    assert isinstance(transform.pad_val, dict)\n    results = transform(results)\n    img_shape = results['img'].shape\n    assert img_shape[0] % 32 == 0\n    assert img_shape[1] % 32 == 0\n\n\ndef test_normalize():\n    img_norm_cfg = dict(\n        mean=[123.675, 116.28, 103.53],\n        std=[58.395, 57.12, 57.375],\n        to_rgb=True)\n    transform = dict(type='Normalize', **img_norm_cfg)\n    transform = build_from_cfg(transform, PIPELINES)\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    original_img = copy.deepcopy(img)\n    results['img'] = img\n    results['img2'] = copy.deepcopy(img)\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['scale_factor'] = 1.0\n    results['img_fields'] = ['img', 'img2']\n\n    results = transform(results)\n    assert np.equal(results['img'], results['img2']).all()\n\n    mean = np.array(img_norm_cfg['mean'])\n    std = np.array(img_norm_cfg['std'])\n    converted_img = (original_img[..., ::-1] - mean) / std\n    assert np.allclose(results['img'], converted_img)\n\n\ndef test_albu_transform():\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n        img_info=dict(filename='color.jpg'))\n\n    # Define simple pipeline\n    load = dict(type='LoadImageFromFile')\n    load = build_from_cfg(load, PIPELINES)\n\n    albu_transform = dict(\n        type='Albu', transforms=[dict(type='ChannelShuffle', p=1)])\n    albu_transform = build_from_cfg(albu_transform, PIPELINES)\n\n    normalize = dict(type='Normalize', mean=[0] * 3, std=[0] * 3, to_rgb=True)\n    normalize = build_from_cfg(normalize, PIPELINES)\n\n    # Execute transforms\n    results = load(results)\n    results = albu_transform(results)\n    results = normalize(results)\n\n    assert results['img'].dtype == np.float32\n\n\ndef test_random_center_crop_pad():\n    # test assertion for invalid crop_size while test_mode=False\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=(-1, 0),\n            test_mode=False,\n            test_pad_mode=None)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid ratios while test_mode=False\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=(511, 511),\n            ratios=(1.0),\n            test_mode=False,\n            test_pad_mode=None)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid mean, std and to_rgb\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=(511, 511),\n            mean=None,\n            std=None,\n            to_rgb=None,\n            test_mode=False,\n            test_pad_mode=None)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid crop_size while test_mode=True\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=(511, 511),\n            ratios=None,\n            border=None,\n            mean=[123.675, 116.28, 103.53],\n            std=[58.395, 57.12, 57.375],\n            to_rgb=True,\n            test_mode=True,\n            test_pad_mode=('logical_or', 127))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid ratios while test_mode=True\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=None,\n            ratios=(0.9, 1.0, 1.1),\n            border=None,\n            mean=[123.675, 116.28, 103.53],\n            std=[58.395, 57.12, 57.375],\n            to_rgb=True,\n            test_mode=True,\n            test_pad_mode=('logical_or', 127))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid border while test_mode=True\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=None,\n            ratios=None,\n            border=128,\n            mean=[123.675, 116.28, 103.53],\n            std=[58.395, 57.12, 57.375],\n            to_rgb=True,\n            test_mode=True,\n            test_pad_mode=('logical_or', 127))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid test_pad_mode while test_mode=True\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='RandomCenterCropPad',\n            crop_size=None,\n            ratios=None,\n            border=None,\n            mean=[123.675, 116.28, 103.53],\n            std=[58.395, 57.12, 57.375],\n            to_rgb=True,\n            test_mode=True,\n            test_pad_mode=('do_nothing', 100))\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n        img_info=dict(filename='color.jpg'))\n\n    load = dict(type='LoadImageFromFile', to_float32=True)\n    load = build_from_cfg(load, PIPELINES)\n    results = load(results)\n    test_results = copy.deepcopy(results)\n\n    h, w, _ = results['img_shape']\n    gt_bboxes = create_random_bboxes(8, w, h)\n    gt_bboxes_ignore = create_random_bboxes(2, w, h)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    train_transform = dict(\n        type='RandomCenterCropPad',\n        crop_size=(h - 20, w - 20),\n        ratios=(1.0, ),\n        border=128,\n        mean=[123.675, 116.28, 103.53],\n        std=[58.395, 57.12, 57.375],\n        to_rgb=True,\n        test_mode=False,\n        test_pad_mode=None)\n    crop_module = build_from_cfg(train_transform, PIPELINES)\n    train_results = crop_module(results)\n    assert train_results['img'].shape[:2] == (h - 20, w - 20)\n    # All bboxes should be reserved after crop\n    assert train_results['pad_shape'][:2] == (h - 20, w - 20)\n    assert train_results['gt_bboxes'].shape[0] == 8\n    assert train_results['gt_bboxes_ignore'].shape[0] == 2\n    assert train_results['gt_bboxes'].dtype == np.float32\n    assert train_results['gt_bboxes_ignore'].dtype == np.float32\n\n    test_transform = dict(\n        type='RandomCenterCropPad',\n        crop_size=None,\n        ratios=None,\n        border=None,\n        mean=[123.675, 116.28, 103.53],\n        std=[58.395, 57.12, 57.375],\n        to_rgb=True,\n        test_mode=True,\n        test_pad_mode=('logical_or', 127))\n    crop_module = build_from_cfg(test_transform, PIPELINES)\n\n    test_results = crop_module(test_results)\n    assert test_results['img'].shape[:2] == (h | 127, w | 127)\n    assert test_results['pad_shape'][:2] == (h | 127, w | 127)\n    assert 'border' in test_results\n\n\ndef test_multi_scale_flip_aug():\n    # test assertion if give both scale_factor and img_scale\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='MultiScaleFlipAug',\n            scale_factor=1.0,\n            img_scale=[(1333, 800)],\n            transforms=[dict(type='Resize')])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion if both scale_factor and img_scale are None\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='MultiScaleFlipAug',\n            scale_factor=None,\n            img_scale=None,\n            transforms=[dict(type='Resize')])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion if img_scale is not tuple or list of tuple\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='MultiScaleFlipAug',\n            img_scale=[1333, 800],\n            transforms=[dict(type='Resize')])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion if flip_direction is not str or list of str\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='MultiScaleFlipAug',\n            img_scale=[(1333, 800)],\n            flip_direction=1,\n            transforms=[dict(type='Resize')])\n        build_from_cfg(transform, PIPELINES)\n\n    scale_transform = dict(\n        type='MultiScaleFlipAug',\n        img_scale=[(1333, 800), (1333, 640)],\n        transforms=[dict(type='Resize', keep_ratio=True)])\n    transform = build_from_cfg(scale_transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    # Set initial values for default meta_keys\n    results['pad_shape'] = img.shape\n    results['img_fields'] = ['img']\n\n    scale_results = transform(copy.deepcopy(results))\n    assert len(scale_results['img']) == 2\n    assert scale_results['img'][0].shape == (750, 1333, 3)\n    assert scale_results['img_shape'][0] == (750, 1333, 3)\n    assert scale_results['img'][1].shape == (640, 1138, 3)\n    assert scale_results['img_shape'][1] == (640, 1138, 3)\n\n    scale_factor_transform = dict(\n        type='MultiScaleFlipAug',\n        scale_factor=[0.8, 1.0, 1.2],\n        transforms=[dict(type='Resize', keep_ratio=False)])\n    transform = build_from_cfg(scale_factor_transform, PIPELINES)\n    scale_factor_results = transform(copy.deepcopy(results))\n    assert len(scale_factor_results['img']) == 3\n    assert scale_factor_results['img'][0].shape == (230, 409, 3)\n    assert scale_factor_results['img_shape'][0] == (230, 409, 3)\n    assert scale_factor_results['img'][1].shape == (288, 512, 3)\n    assert scale_factor_results['img_shape'][1] == (288, 512, 3)\n    assert scale_factor_results['img'][2].shape == (345, 614, 3)\n    assert scale_factor_results['img_shape'][2] == (345, 614, 3)\n\n    # test pipeline of coco_detection\n    results = dict(\n        img_prefix=osp.join(osp.dirname(__file__), '../../../data'),\n        img_info=dict(filename='color.jpg'))\n    load_cfg, multi_scale_cfg = mmcv.Config.fromfile(\n        'configs/_base_/datasets/coco_detection.py').test_pipeline\n    load = build_from_cfg(load_cfg, PIPELINES)\n    transform = build_from_cfg(multi_scale_cfg, PIPELINES)\n    results = transform(load(results))\n    assert len(results['img']) == 1\n    assert len(results['img_metas']) == 1\n    assert isinstance(results['img'][0], torch.Tensor)\n    assert isinstance(results['img_metas'][0], mmcv.parallel.DataContainer)\n    assert results['img_metas'][0].data['ori_shape'] == (288, 512, 3)\n    assert results['img_metas'][0].data['img_shape'] == (750, 1333, 3)\n    assert results['img_metas'][0].data['pad_shape'] == (768, 1344, 3)\n    assert results['img_metas'][0].data['scale_factor'].tolist() == [\n        2.603515625, 2.6041667461395264, 2.603515625, 2.6041667461395264\n    ]\n\n\ndef test_cutout():\n    # test n_holes\n    with pytest.raises(AssertionError):\n        transform = dict(type='CutOut', n_holes=(5, 3), cutout_shape=(8, 8))\n        build_from_cfg(transform, PIPELINES)\n    with pytest.raises(AssertionError):\n        transform = dict(type='CutOut', n_holes=(3, 4, 5), cutout_shape=(8, 8))\n        build_from_cfg(transform, PIPELINES)\n    # test cutout_shape and cutout_ratio\n    with pytest.raises(AssertionError):\n        transform = dict(type='CutOut', n_holes=1, cutout_shape=8)\n        build_from_cfg(transform, PIPELINES)\n    with pytest.raises(AssertionError):\n        transform = dict(type='CutOut', n_holes=1, cutout_ratio=0.2)\n        build_from_cfg(transform, PIPELINES)\n    # either of cutout_shape and cutout_ratio should be given\n    with pytest.raises(AssertionError):\n        transform = dict(type='CutOut', n_holes=1)\n        build_from_cfg(transform, PIPELINES)\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='CutOut',\n            n_holes=1,\n            cutout_shape=(2, 2),\n            cutout_ratio=(0.4, 0.4))\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n\n    results['img'] = img\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    results['pad_shape'] = img.shape\n    results['img_fields'] = ['img']\n\n    transform = dict(type='CutOut', n_holes=1, cutout_shape=(10, 10))\n    cutout_module = build_from_cfg(transform, PIPELINES)\n    cutout_result = cutout_module(copy.deepcopy(results))\n    assert cutout_result['img'].sum() < img.sum()\n\n    transform = dict(type='CutOut', n_holes=1, cutout_ratio=(0.8, 0.8))\n    cutout_module = build_from_cfg(transform, PIPELINES)\n    cutout_result = cutout_module(copy.deepcopy(results))\n    assert cutout_result['img'].sum() < img.sum()\n\n    transform = dict(\n        type='CutOut',\n        n_holes=(2, 4),\n        cutout_shape=[(10, 10), (15, 15)],\n        fill_in=(255, 255, 255))\n    cutout_module = build_from_cfg(transform, PIPELINES)\n    cutout_result = cutout_module(copy.deepcopy(results))\n    assert cutout_result['img'].sum() > img.sum()\n\n    transform = dict(\n        type='CutOut',\n        n_holes=1,\n        cutout_ratio=(0.8, 0.8),\n        fill_in=(255, 255, 255))\n    cutout_module = build_from_cfg(transform, PIPELINES)\n    cutout_result = cutout_module(copy.deepcopy(results))\n    assert cutout_result['img'].sum() > img.sum()\n\n\ndef test_random_shift():\n    # test assertion for invalid shift_ratio\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomShift', shift_ratio=1.5)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid max_shift_px\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomShift', max_shift_px=-1)\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n    # TODO: add img_fields test\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n\n    h, w, _ = img.shape\n    gt_bboxes = create_random_bboxes(8, w, h)\n    gt_bboxes_ignore = create_random_bboxes(2, w, h)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    transform = dict(type='RandomShift', shift_ratio=1.0)\n    random_shift_module = build_from_cfg(transform, PIPELINES)\n    results = random_shift_module(results)\n\n    assert results['img'].shape[:2] == (h, w)\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n\ndef test_random_affine():\n    # test assertion for invalid translate_ratio\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomAffine', max_translate_ratio=1.5)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid scaling_ratio_range\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomAffine', scaling_ratio_range=(1.5, 0.5))\n        build_from_cfg(transform, PIPELINES)\n\n    with pytest.raises(AssertionError):\n        transform = dict(type='RandomAffine', scaling_ratio_range=(0, 0.5))\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n\n    h, w, _ = img.shape\n    gt_bboxes = create_random_bboxes(8, w, h)\n    gt_bboxes_ignore = create_random_bboxes(2, w, h)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    transform = dict(type='RandomAffine')\n    random_affine_module = build_from_cfg(transform, PIPELINES)\n    results = random_affine_module(results)\n\n    assert results['img'].shape[:2] == (h, w)\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n    # test filter bbox\n    gt_bboxes = np.array([[0, 0, 1, 1], [0, 0, 3, 100]], dtype=np.float32)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    transform = dict(\n        type='RandomAffine',\n        max_rotate_degree=0.,\n        max_translate_ratio=0.,\n        scaling_ratio_range=(1., 1.),\n        max_shear_degree=0.,\n        border=(0, 0),\n        min_bbox_size=2,\n        max_aspect_ratio=20,\n        skip_filter=False)\n    random_affine_module = build_from_cfg(transform, PIPELINES)\n\n    results = random_affine_module(results)\n\n    assert results['gt_bboxes'].shape[0] == 0\n    assert results['gt_labels'].shape[0] == 0\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n\ndef test_mosaic():\n    # test assertion for invalid img_scale\n    with pytest.raises(AssertionError):\n        transform = dict(type='Mosaic', img_scale=640)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid probability\n    with pytest.raises(AssertionError):\n        transform = dict(type='Mosaic', prob=1.5)\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n    # TODO: add img_fields test\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n\n    h, w, _ = img.shape\n    gt_bboxes = create_random_bboxes(8, w, h)\n    gt_bboxes_ignore = create_random_bboxes(2, w, h)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    transform = dict(type='Mosaic', img_scale=(10, 12))\n    mosaic_module = build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid mix_results\n    with pytest.raises(AssertionError):\n        mosaic_module(results)\n\n    results['mix_results'] = [copy.deepcopy(results)] * 3\n    results = mosaic_module(results)\n    assert results['img'].shape[:2] == (20, 24)\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n\ndef test_mixup():\n    # test assertion for invalid img_scale\n    with pytest.raises(AssertionError):\n        transform = dict(type='MixUp', img_scale=640)\n        build_from_cfg(transform, PIPELINES)\n\n    results = dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    results['img'] = img\n    # TODO: add img_fields test\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n\n    h, w, _ = img.shape\n    gt_bboxes = create_random_bboxes(8, w, h)\n    gt_bboxes_ignore = create_random_bboxes(2, w, h)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = gt_bboxes_ignore\n    transform = dict(type='MixUp', img_scale=(10, 12))\n    mixup_module = build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid mix_results\n    with pytest.raises(AssertionError):\n        mixup_module(results)\n\n    with pytest.raises(AssertionError):\n        results['mix_results'] = [copy.deepcopy(results)] * 2\n        mixup_module(results)\n\n    results['mix_results'] = [copy.deepcopy(results)]\n    results = mixup_module(results)\n    assert results['img'].shape[:2] == (288, 512)\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n    # test filter bbox :\n    # 2 boxes with sides 1 and 3 are filtered as min_bbox_size=5\n    gt_bboxes = np.array([[0, 0, 1, 1], [0, 0, 3, 3]], dtype=np.float32)\n    results['gt_labels'] = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n    results['gt_bboxes'] = gt_bboxes\n    results['gt_bboxes_ignore'] = np.array([], dtype=np.float32)\n    mixresults = results['mix_results'][0]\n    mixresults['gt_labels'] = copy.deepcopy(results['gt_labels'])\n    mixresults['gt_bboxes'] = copy.deepcopy(results['gt_bboxes'])\n    mixresults['gt_bboxes_ignore'] = copy.deepcopy(results['gt_bboxes_ignore'])\n    transform = dict(\n        type='MixUp',\n        img_scale=(10, 12),\n        ratio_range=(1.5, 1.5),\n        min_bbox_size=5,\n        skip_filter=False)\n    mixup_module = build_from_cfg(transform, PIPELINES)\n\n    results = mixup_module(results)\n\n    assert results['gt_bboxes'].shape[0] == 2\n    assert results['gt_labels'].shape[0] == 2\n    assert results['gt_labels'].shape[0] == results['gt_bboxes'].shape[0]\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    assert results['gt_bboxes_ignore'].dtype == np.float32\n\n\ndef test_photo_metric_distortion():\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    transform = dict(type='PhotoMetricDistortion')\n    distortion_module = build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid img_fields\n    with pytest.raises(AssertionError):\n        results = dict()\n        results['img'] = img\n        results['img2'] = img\n        results['img_fields'] = ['img', 'img2']\n        distortion_module(results)\n\n    # test uint8 input\n    results = dict()\n    results['img'] = img\n    results = distortion_module(results)\n    assert results['img'].dtype == np.float32\n\n    # test float32 input\n    results = dict()\n    results['img'] = img.astype(np.float32)\n    results = distortion_module(results)\n    assert results['img'].dtype == np.float32\n\n\ndef test_copypaste():\n    dst_results, src_results = dict(), dict()\n    img = mmcv.imread(\n        osp.join(osp.dirname(__file__), '../../../data/color.jpg'), 'color')\n    dst_results['img'] = img.copy()\n    src_results['img'] = img.copy()\n\n    h, w, _ = img.shape\n\n    dst_bboxes = np.array([[0.2 * w, 0.2 * h, 0.4 * w, 0.4 * h],\n                           [0.5 * w, 0.5 * h, 0.6 * w, 0.6 * h]],\n                          dtype=np.float32)\n    src_bboxes = np.array([[0.1 * w, 0.1 * h, 0.3 * w, 0.5 * h],\n                           [0.4 * w, 0.4 * h, 0.7 * w, 0.7 * h],\n                           [0.8 * w, 0.8 * h, 0.9 * w, 0.9 * h]],\n                          dtype=np.float32)\n    dst_labels = np.ones(dst_bboxes.shape[0], dtype=np.int64)\n    src_labels = np.ones(src_bboxes.shape[0], dtype=np.int64) * 2\n    dst_masks = create_full_masks(dst_bboxes, w, h)\n    src_masks = create_full_masks(src_bboxes, w, h)\n    dst_results['gt_bboxes'] = dst_bboxes.copy()\n    src_results['gt_bboxes'] = src_bboxes.copy()\n    dst_results['gt_labels'] = dst_labels.copy()\n    src_results['gt_labels'] = src_labels.copy()\n    dst_results['gt_masks'] = copy.deepcopy(dst_masks)\n    src_results['gt_masks'] = copy.deepcopy(src_masks)\n\n    results = copy.deepcopy(dst_results)\n\n    transform = dict(type='CopyPaste', selected=False)\n    copypaste_module = build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid mix_results\n    with pytest.raises(AssertionError):\n        copypaste_module(results)\n\n    results['mix_results'] = [copy.deepcopy(src_results)]\n    results = copypaste_module(results)\n    assert results['img'].shape[:2] == (h, w)\n    # one object of destination image is totally occluded\n    assert results['gt_bboxes'].shape[0] == \\\n           dst_bboxes.shape[0] + src_bboxes.shape[0] - 1\n    assert results['gt_labels'].shape[0] == \\\n           dst_labels.shape[0] + src_labels.shape[0] - 1\n    assert results['gt_masks'].masks.shape[0] == \\\n           dst_masks.masks.shape[0] + src_masks.masks.shape[0] - 1\n\n    assert results['gt_labels'].dtype == np.int64\n    assert results['gt_bboxes'].dtype == np.float32\n    # the object of destination image is partially occluded\n    ori_bbox = dst_bboxes[0]\n    occ_bbox = results['gt_bboxes'][0]\n    ori_mask = dst_masks.masks[0]\n    occ_mask = results['gt_masks'].masks[0]\n    assert ori_mask.sum() > occ_mask.sum()\n    assert np.all(np.abs(occ_bbox - ori_bbox) <=\n                  copypaste_module.bbox_occluded_thr) or \\\n        occ_mask.sum() > copypaste_module.mask_occluded_thr\n    # test copypaste with selected objects\n    transform = dict(type='CopyPaste')\n    copypaste_module = build_from_cfg(transform, PIPELINES)\n    results = copy.deepcopy(dst_results)\n    results['mix_results'] = [copy.deepcopy(src_results)]\n    copypaste_module(results)\n    # test copypaste with an empty source image\n    results = copy.deepcopy(dst_results)\n    valid_inds = [False] * src_bboxes.shape[0]\n    src_results['gt_bboxes'] = src_bboxes[valid_inds]\n    src_results['gt_labels'] = src_labels[valid_inds]\n    src_results['gt_masks'] = src_masks[valid_inds]\n    results['mix_results'] = [copy.deepcopy(src_results)]\n    copypaste_module(results)\n    # test copy_paste based on bbox\n    dst_results.pop('gt_masks')\n    src_results.pop('gt_masks')\n    dst_bboxes = dst_results['gt_bboxes']\n    src_bboxes = src_results['gt_bboxes']\n    dst_masks = create_full_masks(dst_bboxes, w, h)\n    src_masks = create_full_masks(src_bboxes, w, h)\n    results = copy.deepcopy(dst_results)\n    results['mix_results'] = [copy.deepcopy(src_results)]\n    results = copypaste_module(results)\n    result_masks = create_full_masks(results['gt_bboxes'], w, h)\n    result_masks_np = np.where(result_masks.to_ndarray().sum(0) > 0, 1, 0)\n    masks_np = np.where(\n        (src_masks.to_ndarray().sum(0) + dst_masks.to_ndarray().sum(0)) > 0, 1,\n        0)\n    assert np.all(result_masks_np == masks_np)\n    assert 'gt_masks' not in results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/test_translate.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\n\nimport numpy as np\nimport pycocotools.mask as maskUtils\nimport pytest\nfrom mmcv.utils import build_from_cfg\n\nfrom mmdet.core.mask import BitmapMasks, PolygonMasks\nfrom mmdet.datasets.builder import PIPELINES\n\n\ndef _check_keys(results, results_translated):\n    assert len(set(results.keys()).difference(set(\n        results_translated.keys()))) == 0\n    assert len(set(results_translated.keys()).difference(set(\n        results.keys()))) == 0\n\n\ndef _pad(h, w, c, pad_val, axis=-1, dtype=np.float32):\n    assert isinstance(pad_val, (int, float, tuple))\n    if isinstance(pad_val, (int, float)):\n        pad_val = tuple([pad_val] * c)\n    assert len(pad_val) == c\n    pad_data = np.stack([np.ones((h, w)) * pad_val[i] for i in range(c)],\n                        axis=axis).astype(dtype)\n    return pad_data\n\n\ndef _construct_img(results):\n    h, w = results['img_info']['height'], results['img_info']['width']\n    img = np.random.uniform(0, 1, (h, w, 3)) * 255\n    img = img.astype(np.uint8)\n    results['img'] = img\n    results['img_shape'] = img.shape\n    results['ori_shape'] = img.shape\n    results['img_fields'] = ['img']\n\n\ndef _construct_ann_info(h=427, w=640, c=3):\n    bboxes = np.array(\n        [[222.62, 217.82, 241.81, 238.93], [50.5, 329.7, 130.23, 384.96],\n         [175.47, 331.97, 254.8, 389.26]],\n        dtype=np.float32)\n    labels = np.array([9, 2, 2], dtype=np.int64)\n    bboxes_ignore = np.array([[59., 253., 311., 337.]], dtype=np.float32)\n    masks = [\n        [[222.62, 217.82, 222.62, 238.93, 241.81, 238.93, 240.85, 218.78]],\n        [[\n            69.19, 332.17, 82.39, 330.25, 97.24, 329.7, 114.01, 331.35, 116.76,\n            337.39, 119.78, 343.17, 128.03, 344.54, 128.86, 347.84, 124.18,\n            350.59, 129.96, 358.01, 130.23, 366.54, 129.13, 377.81, 125.28,\n            382.48, 119.78, 381.93, 117.31, 377.54, 116.21, 379.46, 114.83,\n            382.21, 107.14, 383.31, 105.49, 378.36, 77.99, 377.54, 75.79,\n            381.11, 69.74, 381.93, 66.72, 378.91, 65.07, 377.81, 63.15, 379.19,\n            62.32, 383.31, 52.7, 384.96, 50.5, 379.46, 51.32, 375.61, 51.6,\n            370.11, 51.6, 364.06, 53.52, 354.99, 56.27, 344.54, 59.57, 336.29,\n            66.45, 332.72\n        ]],\n        [[\n            175.47, 386.86, 175.87, 376.44, 177.08, 351.2, 189.1, 332.77,\n            194.31, 331.97, 236.37, 332.77, 244.79, 342.39, 246.79, 346.79,\n            248.39, 345.99, 251.6, 345.59, 254.8, 348.0, 254.8, 351.6, 250.0,\n            352.0, 250.0, 354.81, 251.6, 358.41, 251.6, 364.42, 251.6, 370.03,\n            252.8, 378.04, 252.8, 384.05, 250.8, 387.26, 246.39, 387.66,\n            245.19, 386.46, 242.38, 388.86, 233.97, 389.26, 232.77, 388.06,\n            232.77, 383.65, 195.91, 381.25, 195.91, 384.86, 191.1, 384.86,\n            187.49, 385.26, 186.69, 382.85, 184.29, 382.45, 183.09, 387.26,\n            178.68, 388.46, 176.28, 387.66\n        ]]\n    ]\n    return dict(\n        bboxes=bboxes, labels=labels, bboxes_ignore=bboxes_ignore, masks=masks)\n\n\ndef _load_bboxes(results):\n    ann_info = results['ann_info']\n    results['gt_bboxes'] = ann_info['bboxes'].copy()\n    results['bbox_fields'] = ['gt_bboxes']\n    gt_bboxes_ignore = ann_info.get('bboxes_ignore', None)\n    if gt_bboxes_ignore is not None:\n        results['gt_bboxes_ignore'] = gt_bboxes_ignore.copy()\n        results['bbox_fields'].append('gt_bboxes_ignore')\n\n\ndef _load_labels(results):\n    results['gt_labels'] = results['ann_info']['labels'].copy()\n\n\ndef _poly2mask(mask_ann, img_h, img_w):\n    if isinstance(mask_ann, list):\n        # polygon -- a single object might consist of multiple parts\n        # we merge all parts into one mask rle code\n        rles = maskUtils.frPyObjects(mask_ann, img_h, img_w)\n        rle = maskUtils.merge(rles)\n    elif isinstance(mask_ann['counts'], list):\n        # uncompressed RLE\n        rle = maskUtils.frPyObjects(mask_ann, img_h, img_w)\n    else:\n        # rle\n        rle = mask_ann\n    mask = maskUtils.decode(rle)\n    return mask\n\n\ndef _process_polygons(polygons):\n    polygons = [np.array(p) for p in polygons]\n    valid_polygons = []\n    for polygon in polygons:\n        if len(polygon) % 2 == 0 and len(polygon) >= 6:\n            valid_polygons.append(polygon)\n    return valid_polygons\n\n\ndef _load_masks(results, poly2mask=True):\n    h, w = results['img_info']['height'], results['img_info']['width']\n    gt_masks = results['ann_info']['masks']\n    if poly2mask:\n        gt_masks = BitmapMasks([_poly2mask(mask, h, w) for mask in gt_masks],\n                               h, w)\n    else:\n        gt_masks = PolygonMasks(\n            [_process_polygons(polygons) for polygons in gt_masks], h, w)\n    results['gt_masks'] = gt_masks\n    results['mask_fields'] = ['gt_masks']\n\n\ndef _construct_semantic_seg(results):\n    h, w = results['img_info']['height'], results['img_info']['width']\n    seg_toy = (np.random.uniform(0, 1, (h, w)) * 255).astype(np.uint8)\n    results['gt_semantic_seg'] = seg_toy\n    results['seg_fields'] = ['gt_semantic_seg']\n\n\ndef construct_toy_data(poly2mask=True):\n    img_info = dict(height=427, width=640)\n    ann_info = _construct_ann_info(h=img_info['height'], w=img_info['width'])\n    results = dict(img_info=img_info, ann_info=ann_info)\n    # construct image, similar to 'LoadImageFromFile'\n    _construct_img(results)\n    # 'LoadAnnotations' (bboxes, labels, masks, semantic_seg)\n    _load_bboxes(results)\n    _load_labels(results)\n    _load_masks(results, poly2mask)\n    _construct_semantic_seg(results)\n    return results\n\n\ndef test_translate():\n    # test assertion for invalid value of level\n    with pytest.raises(AssertionError):\n        transform = dict(type='Translate', level=-1)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid type of level\n    with pytest.raises(AssertionError):\n        transform = dict(type='Translate', level=[1])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid prob\n    with pytest.raises(AssertionError):\n        transform = dict(type='Translate', level=1, prob=-0.5)\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for the num of elements in tuple img_fill_val\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='Translate', level=1, img_fill_val=(128, 128, 128, 128))\n        build_from_cfg(transform, PIPELINES)\n\n    # test ValueError for invalid type of img_fill_val\n    with pytest.raises(ValueError):\n        transform = dict(\n            type='Translate', level=1, img_fill_val=[128, 128, 128])\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid value of img_fill_val\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='Translate', level=1, img_fill_val=(128, -1, 256))\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid value of direction\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='Translate', level=1, img_fill_val=128, direction='diagonal')\n        build_from_cfg(transform, PIPELINES)\n\n    # test assertion for invalid type of max_translate_offset\n    with pytest.raises(AssertionError):\n        transform = dict(\n            type='Translate',\n            level=1,\n            img_fill_val=128,\n            max_translate_offset=(250., ))\n        build_from_cfg(transform, PIPELINES)\n\n    # construct toy data example for unit test\n    results = construct_toy_data()\n\n    def _check_bbox_mask(results,\n                         results_translated,\n                         offset,\n                         direction,\n                         min_size=0.):\n        # The key correspondence from bboxes to labels and masks.\n        bbox2label = {\n            'gt_bboxes': 'gt_labels',\n            'gt_bboxes_ignore': 'gt_labels_ignore'\n        }\n        bbox2mask = {\n            'gt_bboxes': 'gt_masks',\n            'gt_bboxes_ignore': 'gt_masks_ignore'\n        }\n\n        def _translate_bbox(bboxes, offset, direction, max_h, max_w):\n            if direction == 'horizontal':\n                bboxes[:, 0::2] = bboxes[:, 0::2] + offset\n            elif direction == 'vertical':\n                bboxes[:, 1::2] = bboxes[:, 1::2] + offset\n            else:\n                raise ValueError\n            bboxes[:, 0::2] = np.clip(bboxes[:, 0::2], 0, max_w)\n            bboxes[:, 1::2] = np.clip(bboxes[:, 1::2], 0, max_h)\n            return bboxes\n\n        h, w, c = results_translated['img'].shape\n        for key in results_translated.get('bbox_fields', []):\n            label_key, mask_key = bbox2label[key], bbox2mask[key]\n            # check length of key\n            if label_key in results:\n                assert len(results_translated[key]) == len(\n                    results_translated[label_key])\n            if mask_key in results:\n                assert len(results_translated[key]) == len(\n                    results_translated[mask_key])\n            # construct gt_bboxes\n            gt_bboxes = _translate_bbox(\n                copy.deepcopy(results[key]), offset, direction, h, w)\n            valid_inds = (gt_bboxes[:, 2] - gt_bboxes[:, 0] > min_size) & (\n                gt_bboxes[:, 3] - gt_bboxes[:, 1] > min_size)\n            gt_bboxes = gt_bboxes[valid_inds]\n            # check bbox\n            assert np.equal(gt_bboxes, results_translated[key]).all()\n\n            # construct gt_masks\n            if mask_key not in results:\n                # e.g. 'gt_masks_ignore'\n                continue\n            masks, masks_translated = results[mask_key].to_ndarray(\n            ), results_translated[mask_key].to_ndarray()\n            assert masks.dtype == masks_translated.dtype\n            if direction == 'horizontal':\n                masks_pad = _pad(\n                    h,\n                    abs(offset),\n                    masks.shape[0],\n                    0,\n                    axis=0,\n                    dtype=masks.dtype)\n                if offset <= 0:\n                    # left shift\n                    gt_masks = np.concatenate(\n                        (masks[:, :, -offset:], masks_pad), axis=-1)\n                else:\n                    # right shift\n                    gt_masks = np.concatenate(\n                        (masks_pad, masks[:, :, :-offset]), axis=-1)\n            else:\n                masks_pad = _pad(\n                    abs(offset),\n                    w,\n                    masks.shape[0],\n                    0,\n                    axis=0,\n                    dtype=masks.dtype)\n                if offset <= 0:\n                    # top shift\n                    gt_masks = np.concatenate(\n                        (masks[:, -offset:, :], masks_pad), axis=1)\n                else:\n                    # bottom shift\n                    gt_masks = np.concatenate(\n                        (masks_pad, masks[:, :-offset, :]), axis=1)\n            gt_masks = gt_masks[valid_inds]\n            # check masks\n            assert np.equal(gt_masks, masks_translated).all()\n\n    def _check_img_seg(results, results_translated, keys, offset, fill_val,\n                       direction):\n        for key in keys:\n            assert isinstance(results_translated[key], type(results[key]))\n            # assert type(results[key]) == type(results_translated[key])\n            data, data_translated = results[key], results_translated[key]\n            if 'mask' in key:\n                data, data_translated = data.to_ndarray(\n                ), data_translated.to_ndarray()\n            assert data.dtype == data_translated.dtype\n            if 'img' in key:\n                data, data_translated = data.transpose(\n                    (2, 0, 1)), data_translated.transpose((2, 0, 1))\n            elif 'seg' in key:\n                data, data_translated = data[None, :, :], data_translated[\n                    None, :, :]\n            c, h, w = data.shape\n            if direction == 'horizontal':\n                data_pad = _pad(\n                    h, abs(offset), c, fill_val, axis=0, dtype=data.dtype)\n                if offset <= 0:\n                    # left shift\n                    data_gt = np.concatenate((data[:, :, -offset:], data_pad),\n                                             axis=-1)\n                else:\n                    # right shift\n                    data_gt = np.concatenate((data_pad, data[:, :, :-offset]),\n                                             axis=-1)\n            else:\n                data_pad = _pad(\n                    abs(offset), w, c, fill_val, axis=0, dtype=data.dtype)\n                if offset <= 0:\n                    # top shift\n                    data_gt = np.concatenate((data[:, -offset:, :], data_pad),\n                                             axis=1)\n                else:\n                    # bottom shift\n                    data_gt = np.concatenate((data_pad, data[:, :-offset, :]),\n                                             axis=1)\n            if 'mask' in key:\n                # TODO assertion here. ``data_translated`` must be a subset\n                # (or equal) of ``data_gt``\n                pass\n            else:\n                assert np.equal(data_gt, data_translated).all()\n\n    def check_translate(results,\n                        results_translated,\n                        offset,\n                        img_fill_val,\n                        seg_ignore_label,\n                        direction,\n                        min_size=0):\n        # check keys\n        _check_keys(results, results_translated)\n        # check image\n        _check_img_seg(results, results_translated,\n                       results.get('img_fields', ['img']), offset,\n                       img_fill_val, direction)\n        # check segmentation map\n        _check_img_seg(results, results_translated,\n                       results.get('seg_fields', []), offset, seg_ignore_label,\n                       direction)\n        # check masks and bboxes\n        _check_bbox_mask(results, results_translated, offset, direction,\n                         min_size)\n\n    # test case when level=0 (without translate aug)\n    img_fill_val = (104, 116, 124)\n    seg_ignore_label = 255\n    transform = dict(\n        type='Translate',\n        level=0,\n        prob=1.0,\n        img_fill_val=img_fill_val,\n        seg_ignore_label=seg_ignore_label)\n    translate_module = build_from_cfg(transform, PIPELINES)\n    results_wo_translate = translate_module(copy.deepcopy(results))\n    check_translate(\n        copy.deepcopy(results),\n        results_wo_translate,\n        0,\n        img_fill_val,\n        seg_ignore_label,\n        'horizontal',\n    )\n\n    # test case when level>0 and translate horizontally (left shift).\n    transform = dict(\n        type='Translate',\n        level=8,\n        prob=1.0,\n        img_fill_val=img_fill_val,\n        random_negative_prob=1.0,\n        seg_ignore_label=seg_ignore_label)\n    translate_module = build_from_cfg(transform, PIPELINES)\n    offset = translate_module.offset\n    results_translated = translate_module(copy.deepcopy(results))\n    check_translate(\n        copy.deepcopy(results),\n        results_translated,\n        -offset,\n        img_fill_val,\n        seg_ignore_label,\n        'horizontal',\n    )\n\n    # test case when level>0 and translate horizontally (right shift).\n    translate_module.random_negative_prob = 0.0\n    results_translated = translate_module(copy.deepcopy(results))\n    check_translate(\n        copy.deepcopy(results),\n        results_translated,\n        offset,\n        img_fill_val,\n        seg_ignore_label,\n        'horizontal',\n    )\n\n    # test case when level>0 and translate vertically (top shift).\n    transform = dict(\n        type='Translate',\n        level=10,\n        prob=1.0,\n        img_fill_val=img_fill_val,\n        seg_ignore_label=seg_ignore_label,\n        random_negative_prob=1.0,\n        direction='vertical')\n    translate_module = build_from_cfg(transform, PIPELINES)\n    offset = translate_module.offset\n    results_translated = translate_module(copy.deepcopy(results))\n    check_translate(\n        copy.deepcopy(results), results_translated, -offset, img_fill_val,\n        seg_ignore_label, 'vertical')\n\n    # test case when level>0 and translate vertically (bottom shift).\n    translate_module.random_negative_prob = 0.0\n    results_translated = translate_module(copy.deepcopy(results))\n    check_translate(\n        copy.deepcopy(results), results_translated, offset, img_fill_val,\n        seg_ignore_label, 'vertical')\n\n    # test case when no translation is called (prob<=0)\n    transform = dict(\n        type='Translate',\n        level=8,\n        prob=0.0,\n        img_fill_val=img_fill_val,\n        random_negative_prob=0.0,\n        seg_ignore_label=seg_ignore_label)\n    translate_module = build_from_cfg(transform, PIPELINES)\n    results_translated = translate_module(copy.deepcopy(results))\n\n    # test translate vertically with PolygonMasks (top shift)\n    results = construct_toy_data(False)\n    transform = dict(\n        type='Translate',\n        level=10,\n        prob=1.0,\n        img_fill_val=img_fill_val,\n        seg_ignore_label=seg_ignore_label,\n        direction='vertical')\n    translate_module = build_from_cfg(transform, PIPELINES)\n    offset = translate_module.offset\n    translate_module.random_negative_prob = 1.0\n    results_translated = translate_module(copy.deepcopy(results))\n\n    def _translated_gt(masks, direction, offset, out_shape):\n        translated_masks = []\n        for poly_per_obj in masks:\n            translated_poly_per_obj = []\n            for p in poly_per_obj:\n                p = p.copy()\n                if direction == 'horizontal':\n                    p[0::2] = np.clip(p[0::2] + offset, 0, out_shape[1])\n                elif direction == 'vertical':\n                    p[1::2] = np.clip(p[1::2] + offset, 0, out_shape[0])\n                if PolygonMasks([[p]], *out_shape).areas[0] > 0:\n                    # filter invalid (area=0)\n                    translated_poly_per_obj.append(p)\n            if len(translated_poly_per_obj):\n                translated_masks.append(translated_poly_per_obj)\n        translated_masks = PolygonMasks(translated_masks, *out_shape)\n        return translated_masks\n\n    h, w = results['img_shape'][:2]\n    for key in results.get('mask_fields', []):\n        masks = results[key]\n        translated_gt = _translated_gt(masks, 'vertical', -offset, (h, w))\n        assert np.equal(results_translated[key].to_ndarray(),\n                        translated_gt.to_ndarray()).all()\n\n    # test translate horizontally with PolygonMasks (right shift)\n    results = construct_toy_data(False)\n    transform = dict(\n        type='Translate',\n        level=8,\n        prob=1.0,\n        img_fill_val=img_fill_val,\n        random_negative_prob=0.0,\n        seg_ignore_label=seg_ignore_label)\n    translate_module = build_from_cfg(transform, PIPELINES)\n    offset = translate_module.offset\n    results_translated = translate_module(copy.deepcopy(results))\n    h, w = results['img_shape'][:2]\n    for key in results.get('mask_fields', []):\n        masks = results[key]\n        translated_gt = _translated_gt(masks, 'horizontal', offset, (h, w))\n        assert np.equal(results_translated[key].to_ndarray(),\n                        translated_gt.to_ndarray()).all()\n\n    # test AutoAugment equipped with Translate\n    policies = [[dict(type='Translate', level=10, prob=1.)]]\n    autoaug = dict(type='AutoAugment', policies=policies)\n    autoaug_module = build_from_cfg(autoaug, PIPELINES)\n    autoaug_module(copy.deepcopy(results))\n\n    policies = [[\n        dict(type='Translate', level=10, prob=1.),\n        dict(\n            type='Translate',\n            level=8,\n            img_fill_val=img_fill_val,\n            direction='vertical')\n    ]]\n    autoaug = dict(type='AutoAugment', policies=policies)\n    autoaug_module = build_from_cfg(autoaug, PIPELINES)\n    autoaug_module(copy.deepcopy(results))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_pipelines/test_transform/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\n\nfrom mmdet.core.mask import BitmapMasks, PolygonMasks\n\n\ndef _check_fields(results, pipeline_results, keys):\n    \"\"\"Check data in fields from two results are same.\"\"\"\n    for key in keys:\n        if isinstance(results[key], (BitmapMasks, PolygonMasks)):\n            assert np.equal(results[key].to_ndarray(),\n                            pipeline_results[key].to_ndarray()).all()\n        else:\n            assert np.equal(results[key], pipeline_results[key]).all()\n            assert results[key].dtype == pipeline_results[key].dtype\n\n\ndef check_result_same(results, pipeline_results):\n    \"\"\"Check whether the `pipeline_results` is the same with the predefined\n    `results`.\n\n    Args:\n        results (dict): Predefined results which should be the standard output\n            of the transform pipeline.\n        pipeline_results (dict): Results processed by the transform pipeline.\n    \"\"\"\n    # check image\n    _check_fields(results, pipeline_results,\n                  results.get('img_fields', ['img']))\n    # check bboxes\n    _check_fields(results, pipeline_results, results.get('bbox_fields', []))\n    # check masks\n    _check_fields(results, pipeline_results, results.get('mask_fields', []))\n    # check segmentations\n    _check_fields(results, pipeline_results, results.get('seg_fields', []))\n    # check gt_labels\n    if 'gt_labels' in results:\n        assert np.equal(results['gt_labels'],\n                        pipeline_results['gt_labels']).all()\n\n\ndef construct_toy_data(poly2mask=True):\n    img = np.array([[1, 2, 3, 4], [5, 6, 7, 8]], dtype=np.uint8)\n    img = np.stack([img, img, img], axis=-1)\n    results = dict()\n    # image\n    results['img'] = img\n    results['img_shape'] = img.shape\n    results['img_fields'] = ['img']\n    # bboxes\n    results['bbox_fields'] = ['gt_bboxes', 'gt_bboxes_ignore']\n    results['gt_bboxes'] = np.array([[0., 0., 2., 1.]], dtype=np.float32)\n    results['gt_bboxes_ignore'] = np.array([[2., 0., 3., 1.]],\n                                           dtype=np.float32)\n    # labels\n    results['gt_labels'] = np.array([1], dtype=np.int64)\n    # masks\n    results['mask_fields'] = ['gt_masks']\n    if poly2mask:\n        gt_masks = np.array([[0, 1, 1, 0], [0, 1, 0, 0]],\n                            dtype=np.uint8)[None, :, :]\n        results['gt_masks'] = BitmapMasks(gt_masks, 2, 4)\n    else:\n        raw_masks = [[np.array([0, 0, 2, 0, 2, 1, 0, 1], dtype=np.float)]]\n        results['gt_masks'] = PolygonMasks(raw_masks, 2, 4)\n    # segmentations\n    results['seg_fields'] = ['gt_semantic_seg']\n    results['gt_semantic_seg'] = img[..., 0]\n    return results\n\n\ndef create_random_bboxes(num_bboxes, img_w, img_h):\n    bboxes_left_top = np.random.uniform(0, 0.5, size=(num_bboxes, 2))\n    bboxes_right_bottom = np.random.uniform(0.5, 1, size=(num_bboxes, 2))\n    bboxes = np.concatenate((bboxes_left_top, bboxes_right_bottom), 1)\n    bboxes = (bboxes * np.array([img_w, img_h, img_w, img_h])).astype(\n        np.float32)\n    return bboxes\n\n\ndef create_full_masks(gt_bboxes, img_w, img_h):\n    xmin, ymin = gt_bboxes[:, 0:1], gt_bboxes[:, 1:2]\n    xmax, ymax = gt_bboxes[:, 2:3], gt_bboxes[:, 3:4]\n    gt_masks = np.zeros((len(gt_bboxes), img_h, img_w), dtype=np.uint8)\n    for i in range(len(gt_bboxes)):\n        gt_masks[i, int(ymin[i]):int(ymax[i]), int(xmin[i]):int(xmax[i])] = 1\n    gt_masks = BitmapMasks(gt_masks, img_h, img_w)\n    return gt_masks\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_data/test_utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\n\nfrom mmdet.datasets import get_loading_pipeline, replace_ImageToTensor\n\n\ndef test_replace_ImageToTensor():\n    # with MultiScaleFlipAug\n    pipelines = [\n        dict(type='LoadImageFromFile'),\n        dict(\n            type='MultiScaleFlipAug',\n            img_scale=(1333, 800),\n            flip=False,\n            transforms=[\n                dict(type='Resize', keep_ratio=True),\n                dict(type='RandomFlip'),\n                dict(type='Normalize'),\n                dict(type='Pad', size_divisor=32),\n                dict(type='ImageToTensor', keys=['img']),\n                dict(type='Collect', keys=['img']),\n            ])\n    ]\n    expected_pipelines = [\n        dict(type='LoadImageFromFile'),\n        dict(\n            type='MultiScaleFlipAug',\n            img_scale=(1333, 800),\n            flip=False,\n            transforms=[\n                dict(type='Resize', keep_ratio=True),\n                dict(type='RandomFlip'),\n                dict(type='Normalize'),\n                dict(type='Pad', size_divisor=32),\n                dict(type='DefaultFormatBundle'),\n                dict(type='Collect', keys=['img']),\n            ])\n    ]\n    with pytest.warns(UserWarning):\n        assert expected_pipelines == replace_ImageToTensor(pipelines)\n\n    # without MultiScaleFlipAug\n    pipelines = [\n        dict(type='LoadImageFromFile'),\n        dict(type='Resize', keep_ratio=True),\n        dict(type='RandomFlip'),\n        dict(type='Normalize'),\n        dict(type='Pad', size_divisor=32),\n        dict(type='ImageToTensor', keys=['img']),\n        dict(type='Collect', keys=['img']),\n    ]\n    expected_pipelines = [\n        dict(type='LoadImageFromFile'),\n        dict(type='Resize', keep_ratio=True),\n        dict(type='RandomFlip'),\n        dict(type='Normalize'),\n        dict(type='Pad', size_divisor=32),\n        dict(type='DefaultFormatBundle'),\n        dict(type='Collect', keys=['img']),\n    ]\n    with pytest.warns(UserWarning):\n        assert expected_pipelines == replace_ImageToTensor(pipelines)\n\n\ndef test_get_loading_pipeline():\n    pipelines = [\n        dict(type='LoadImageFromFile'),\n        dict(type='LoadAnnotations', with_bbox=True),\n        dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),\n        dict(type='RandomFlip', flip_ratio=0.5),\n        dict(type='Pad', size_divisor=32),\n        dict(type='DefaultFormatBundle'),\n        dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])\n    ]\n    expected_pipelines = [\n        dict(type='LoadImageFromFile'),\n        dict(type='LoadAnnotations', with_bbox=True)\n    ]\n    assert expected_pipelines == \\\n           get_loading_pipeline(pipelines)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_downstream/test_mmtrack.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nfrom collections import defaultdict\n\nimport numpy as np\nimport pytest\nimport torch\nfrom mmcv import Config\n\n\n@pytest.mark.parametrize(\n    'cfg_file',\n    ['./tests/data/configs_mmtrack/selsa_faster_rcnn_r101_dc5_1x.py'])\ndef test_vid_fgfa_style_forward(cfg_file):\n    config = Config.fromfile(cfg_file)\n    model = copy.deepcopy(config.model)\n    model.pretrains = None\n    model.detector.pretrained = None\n\n    from mmtrack.models import build_model\n    detector = build_model(model)\n\n    # Test forward train with a non-empty truth batch\n    input_shape = (1, 3, 256, 256)\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    img_metas[0]['is_video_data'] = True\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    gt_masks = mm_inputs['gt_masks']\n\n    ref_input_shape = (2, 3, 256, 256)\n    ref_mm_inputs = _demo_mm_inputs(ref_input_shape, num_items=[9, 11])\n    ref_img = ref_mm_inputs.pop('imgs')[None]\n    ref_img_metas = ref_mm_inputs.pop('img_metas')\n    ref_img_metas[0]['is_video_data'] = True\n    ref_img_metas[1]['is_video_data'] = True\n    ref_gt_bboxes = ref_mm_inputs['gt_bboxes']\n    ref_gt_labels = ref_mm_inputs['gt_labels']\n    ref_gt_masks = ref_mm_inputs['gt_masks']\n\n    losses = detector.forward(\n        img=imgs,\n        img_metas=img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        ref_img=ref_img,\n        ref_img_metas=[ref_img_metas],\n        ref_gt_bboxes=ref_gt_bboxes,\n        ref_gt_labels=ref_gt_labels,\n        gt_masks=gt_masks,\n        ref_gt_masks=ref_gt_masks,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    loss.requires_grad_(True)\n    assert float(loss.item()) > 0\n    loss.backward()\n\n    # Test forward train with an empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    img_metas[0]['is_video_data'] = True\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    gt_masks = mm_inputs['gt_masks']\n\n    ref_mm_inputs = _demo_mm_inputs(ref_input_shape, num_items=[0, 0])\n    ref_imgs = ref_mm_inputs.pop('imgs')[None]\n    ref_img_metas = ref_mm_inputs.pop('img_metas')\n    ref_img_metas[0]['is_video_data'] = True\n    ref_img_metas[1]['is_video_data'] = True\n    ref_gt_bboxes = ref_mm_inputs['gt_bboxes']\n    ref_gt_labels = ref_mm_inputs['gt_labels']\n    ref_gt_masks = ref_mm_inputs['gt_masks']\n\n    losses = detector.forward(\n        img=imgs,\n        img_metas=img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        ref_img=ref_imgs,\n        ref_img_metas=[ref_img_metas],\n        ref_gt_bboxes=ref_gt_bboxes,\n        ref_gt_labels=ref_gt_labels,\n        gt_masks=gt_masks,\n        ref_gt_masks=ref_gt_masks,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    loss.requires_grad_(True)\n    assert float(loss.item()) > 0\n    loss.backward()\n\n    # Test forward test with frame_stride=1 and frame_range=[-1,0]\n    with torch.no_grad():\n        imgs = torch.cat([imgs, imgs.clone()], dim=0)\n        img_list = [g[None, :] for g in imgs]\n        img_metas.extend(copy.deepcopy(img_metas))\n        for i in range(len(img_metas)):\n            img_metas[i]['frame_id'] = i\n            img_metas[i]['num_left_ref_imgs'] = 1\n            img_metas[i]['frame_stride'] = 1\n        ref_imgs = [ref_imgs.clone(), imgs[[0]][None].clone()]\n        ref_img_metas = [\n            copy.deepcopy(ref_img_metas),\n            copy.deepcopy([img_metas[0]])\n        ]\n        results = defaultdict(list)\n        for one_img, one_meta, ref_img, ref_img_meta in zip(\n                img_list, img_metas, ref_imgs, ref_img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      ref_img=[ref_img],\n                                      ref_img_metas=[[ref_img_meta]],\n                                      return_loss=False)\n            for k, v in result.items():\n                results[k].append(v)\n\n\n@pytest.mark.parametrize('cfg_file', [\n    './tests/data/configs_mmtrack/tracktor_faster-rcnn_r50_fpn_4e.py',\n])\ndef test_tracktor_forward(cfg_file):\n    config = Config.fromfile(cfg_file)\n    model = copy.deepcopy(config.model)\n    model.pretrains = None\n    model.detector.pretrained = None\n\n    from mmtrack.models import build_model\n    mot = build_model(model)\n    mot.eval()\n\n    input_shape = (1, 3, 256, 256)\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10], with_track=True)\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    with torch.no_grad():\n        imgs = torch.cat([imgs, imgs.clone()], dim=0)\n        img_list = [g[None, :] for g in imgs]\n        img2_metas = copy.deepcopy(img_metas)\n        img2_metas[0]['frame_id'] = 1\n        img_metas.extend(img2_metas)\n        results = defaultdict(list)\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = mot.forward([one_img], [[one_meta]], return_loss=False)\n            for k, v in result.items():\n                results[k].append(v)\n\n\ndef _demo_mm_inputs(\n        input_shape=(1, 3, 300, 300),\n        num_items=None,\n        num_classes=10,\n        with_track=False):\n    \"\"\"Create a superset of inputs needed to run test or train batches.\n\n    Args:\n        input_shape (tuple):\n            input batch dimensions\n\n        num_items (None | List[int]):\n            specifies the number of boxes in each batch item\n\n        num_classes (int):\n            number of different labels a box might have\n    \"\"\"\n    from mmdet.core import BitmapMasks\n\n    (N, C, H, W) = input_shape\n\n    rng = np.random.RandomState(0)\n\n    imgs = rng.rand(*input_shape)\n\n    img_metas = [{\n        'img_shape': (H, W, C),\n        'ori_shape': (H, W, C),\n        'pad_shape': (H, W, C),\n        'filename': '<demo>.png',\n        'scale_factor': 1.0,\n        'flip': False,\n        'frame_id': 0,\n        'img_norm_cfg': {\n            'mean': (128.0, 128.0, 128.0),\n            'std': (10.0, 10.0, 10.0)\n        }\n    } for i in range(N)]\n\n    gt_bboxes = []\n    gt_labels = []\n    gt_masks = []\n    gt_match_indices = []\n\n    for batch_idx in range(N):\n        if num_items is None:\n            num_boxes = rng.randint(1, 10)\n        else:\n            num_boxes = num_items[batch_idx]\n\n        cx, cy, bw, bh = rng.rand(num_boxes, 4).T\n\n        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)\n        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)\n        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)\n        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)\n\n        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T\n        class_idxs = rng.randint(1, num_classes, size=num_boxes)\n\n        gt_bboxes.append(torch.FloatTensor(boxes))\n        gt_labels.append(torch.LongTensor(class_idxs))\n        if with_track:\n            gt_match_indices.append(torch.arange(boxes.shape[0]))\n\n    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)\n    gt_masks.append(BitmapMasks(mask, H, W))\n\n    mm_inputs = {\n        'imgs': torch.FloatTensor(imgs).requires_grad_(True),\n        'img_metas': img_metas,\n        'gt_bboxes': gt_bboxes,\n        'gt_labels': gt_labels,\n        'gt_bboxes_ignore': None,\n        'gt_masks': gt_masks,\n    }\n    if with_track:\n        mm_inputs['gt_match_indices'] = gt_match_indices\n    return mm_inputs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_metrics/test_box_overlap.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.core import BboxOverlaps2D, bbox_overlaps\nfrom mmdet.core.evaluation.bbox_overlaps import \\\n    bbox_overlaps as recall_overlaps\n\n\ndef test_bbox_overlaps_2d(eps=1e-7):\n\n    def _construct_bbox(num_bbox=None):\n        img_h = int(np.random.randint(3, 1000))\n        img_w = int(np.random.randint(3, 1000))\n        if num_bbox is None:\n            num_bbox = np.random.randint(1, 10)\n        x1y1 = torch.rand((num_bbox, 2))\n        x2y2 = torch.max(torch.rand((num_bbox, 2)), x1y1)\n        bboxes = torch.cat((x1y1, x2y2), -1)\n        bboxes[:, 0::2] *= img_w\n        bboxes[:, 1::2] *= img_h\n        return bboxes, num_bbox\n\n    # is_aligned is True, bboxes.size(-1) == 5 (include score)\n    self = BboxOverlaps2D()\n    bboxes1, num_bbox = _construct_bbox()\n    bboxes2, _ = _construct_bbox(num_bbox)\n    bboxes1 = torch.cat((bboxes1, torch.rand((num_bbox, 1))), 1)\n    bboxes2 = torch.cat((bboxes2, torch.rand((num_bbox, 1))), 1)\n    gious = self(bboxes1, bboxes2, 'giou', True)\n    assert gious.size() == (num_bbox, ), gious.size()\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n\n    # is_aligned is True, bboxes1.size(-2) == 0\n    bboxes1 = torch.empty((0, 4))\n    bboxes2 = torch.empty((0, 4))\n    gious = self(bboxes1, bboxes2, 'giou', True)\n    assert gious.size() == (0, ), gious.size()\n    assert torch.all(gious == torch.empty((0, )))\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n\n    # is_aligned is True, and bboxes.ndims > 2\n    bboxes1, num_bbox = _construct_bbox()\n    bboxes2, _ = _construct_bbox(num_bbox)\n    bboxes1 = bboxes1.unsqueeze(0).repeat(2, 1, 1)\n    # test assertion when batch dim is not the same\n    with pytest.raises(AssertionError):\n        self(bboxes1, bboxes2.unsqueeze(0).repeat(3, 1, 1), 'giou', True)\n    bboxes2 = bboxes2.unsqueeze(0).repeat(2, 1, 1)\n    gious = self(bboxes1, bboxes2, 'giou', True)\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n    assert gious.size() == (2, num_bbox)\n    bboxes1 = bboxes1.unsqueeze(0).repeat(2, 1, 1, 1)\n    bboxes2 = bboxes2.unsqueeze(0).repeat(2, 1, 1, 1)\n    gious = self(bboxes1, bboxes2, 'giou', True)\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n    assert gious.size() == (2, 2, num_bbox)\n\n    # is_aligned is False\n    bboxes1, num_bbox1 = _construct_bbox()\n    bboxes2, num_bbox2 = _construct_bbox()\n    gious = self(bboxes1, bboxes2, 'giou')\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n    assert gious.size() == (num_bbox1, num_bbox2)\n\n    # is_aligned is False, and bboxes.ndims > 2\n    bboxes1 = bboxes1.unsqueeze(0).repeat(2, 1, 1)\n    bboxes2 = bboxes2.unsqueeze(0).repeat(2, 1, 1)\n    gious = self(bboxes1, bboxes2, 'giou')\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n    assert gious.size() == (2, num_bbox1, num_bbox2)\n    bboxes1 = bboxes1.unsqueeze(0)\n    bboxes2 = bboxes2.unsqueeze(0)\n    gious = self(bboxes1, bboxes2, 'giou')\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n    assert gious.size() == (1, 2, num_bbox1, num_bbox2)\n\n    # is_aligned is False, bboxes1.size(-2) == 0\n    gious = self(torch.empty(1, 2, 0, 4), bboxes2, 'giou')\n    assert torch.all(gious == torch.empty(1, 2, 0, bboxes2.size(-2)))\n    assert torch.all(gious >= -1) and torch.all(gious <= 1)\n\n    # test allclose between bbox_overlaps and the original official\n    # implementation.\n    bboxes1 = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [32, 32, 38, 42],\n    ])\n    bboxes2 = torch.FloatTensor([\n        [0, 0, 10, 20],\n        [0, 10, 10, 19],\n        [10, 10, 20, 20],\n    ])\n    gious = bbox_overlaps(bboxes1, bboxes2, 'giou', is_aligned=True, eps=eps)\n    gious = gious.numpy().round(4)\n    # the gt is got with four decimal precision.\n    expected_gious = np.array([0.5000, -0.0500, -0.8214])\n    assert np.allclose(gious, expected_gious, rtol=0, atol=eps)\n\n    # test mode 'iof'\n    ious = bbox_overlaps(bboxes1, bboxes2, 'iof', is_aligned=True, eps=eps)\n    assert torch.all(ious >= -1) and torch.all(ious <= 1)\n    assert ious.size() == (bboxes1.size(0), )\n    ious = bbox_overlaps(bboxes1, bboxes2, 'iof', eps=eps)\n    assert torch.all(ious >= -1) and torch.all(ious <= 1)\n    assert ious.size() == (bboxes1.size(0), bboxes2.size(0))\n\n\ndef test_voc_recall_overlaps():\n\n    def _construct_bbox(num_bbox=None):\n        img_h = int(np.random.randint(3, 1000))\n        img_w = int(np.random.randint(3, 1000))\n        if num_bbox is None:\n            num_bbox = np.random.randint(1, 10)\n        x1y1 = torch.rand((num_bbox, 2))\n        x2y2 = torch.max(torch.rand((num_bbox, 2)), x1y1)\n        bboxes = torch.cat((x1y1, x2y2), -1)\n        bboxes[:, 0::2] *= img_w\n        bboxes[:, 1::2] *= img_h\n        return bboxes.numpy(), num_bbox\n\n    bboxes1, num_bbox = _construct_bbox()\n    bboxes2, _ = _construct_bbox(num_bbox)\n    ious = recall_overlaps(\n        bboxes1, bboxes2, 'iou', use_legacy_coordinate=False)\n    assert ious.shape == (num_bbox, num_bbox)\n    assert np.all(ious >= -1) and np.all(ious <= 1)\n\n    ious = recall_overlaps(bboxes1, bboxes2, 'iou', use_legacy_coordinate=True)\n    assert ious.shape == (num_bbox, num_bbox)\n    assert np.all(ious >= -1) and np.all(ious <= 1)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_metrics/test_losses.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models import Accuracy, build_loss\n\n\ndef test_ce_loss():\n    # use_mask and use_sigmoid cannot be true at the same time\n    with pytest.raises(AssertionError):\n        loss_cfg = dict(\n            type='CrossEntropyLoss',\n            use_mask=True,\n            use_sigmoid=True,\n            loss_weight=1.0)\n        build_loss(loss_cfg)\n\n    # test loss with class weights\n    loss_cls_cfg = dict(\n        type='CrossEntropyLoss',\n        use_sigmoid=False,\n        class_weight=[0.8, 0.2],\n        loss_weight=1.0)\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[100, -100]])\n    fake_label = torch.Tensor([1]).long()\n    assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(40.))\n\n    loss_cls_cfg = dict(\n        type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0)\n    loss_cls = build_loss(loss_cls_cfg)\n    assert torch.allclose(loss_cls(fake_pred, fake_label), torch.tensor(200.))\n\n\ndef test_varifocal_loss():\n    # only sigmoid version of VarifocalLoss is implemented\n    with pytest.raises(AssertionError):\n        loss_cfg = dict(\n            type='VarifocalLoss', use_sigmoid=False, loss_weight=1.0)\n        build_loss(loss_cfg)\n\n    # test that alpha should be greater than 0\n    with pytest.raises(AssertionError):\n        loss_cfg = dict(\n            type='VarifocalLoss',\n            alpha=-0.75,\n            gamma=2.0,\n            use_sigmoid=True,\n            loss_weight=1.0)\n        build_loss(loss_cfg)\n\n    # test that pred and target should be of the same size\n    loss_cls_cfg = dict(\n        type='VarifocalLoss',\n        use_sigmoid=True,\n        alpha=0.75,\n        gamma=2.0,\n        iou_weighted=True,\n        reduction='mean',\n        loss_weight=1.0)\n    loss_cls = build_loss(loss_cls_cfg)\n    with pytest.raises(AssertionError):\n        fake_pred = torch.Tensor([[100.0, -100.0]])\n        fake_target = torch.Tensor([[1.0]])\n        loss_cls(fake_pred, fake_target)\n\n    # test the calculation\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[100.0, -100.0]])\n    fake_target = torch.Tensor([[1.0, 0.0]])\n    assert torch.allclose(loss_cls(fake_pred, fake_target), torch.tensor(0.0))\n\n    # test the loss with weights\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[0.0, 100.0]])\n    fake_target = torch.Tensor([[1.0, 1.0]])\n    fake_weight = torch.Tensor([0.0, 1.0])\n    assert torch.allclose(\n        loss_cls(fake_pred, fake_target, fake_weight), torch.tensor(0.0))\n\n\ndef test_kd_loss():\n    # test that temperature should be greater than 1\n    with pytest.raises(AssertionError):\n        loss_cfg = dict(\n            type='KnowledgeDistillationKLDivLoss', loss_weight=1.0, T=0.5)\n        build_loss(loss_cfg)\n\n    # test that pred and target should be of the same size\n    loss_cls_cfg = dict(\n        type='KnowledgeDistillationKLDivLoss', loss_weight=1.0, T=1)\n    loss_cls = build_loss(loss_cls_cfg)\n    with pytest.raises(AssertionError):\n        fake_pred = torch.Tensor([[100, -100]])\n        fake_label = torch.Tensor([1]).long()\n        loss_cls(fake_pred, fake_label)\n\n    # test the calculation\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[100.0, 100.0]])\n    fake_target = torch.Tensor([[1.0, 1.0]])\n    assert torch.allclose(loss_cls(fake_pred, fake_target), torch.tensor(0.0))\n\n    # test the loss with weights\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[100.0, -100.0], [100.0, 100.0]])\n    fake_target = torch.Tensor([[1.0, 0.0], [1.0, 1.0]])\n    fake_weight = torch.Tensor([0.0, 1.0])\n    assert torch.allclose(\n        loss_cls(fake_pred, fake_target, fake_weight), torch.tensor(0.0))\n\n\ndef test_seesaw_loss():\n    # only softmax version of Seesaw Loss is implemented\n    with pytest.raises(AssertionError):\n        loss_cfg = dict(type='SeesawLoss', use_sigmoid=True, loss_weight=1.0)\n        build_loss(loss_cfg)\n\n    # test that cls_score.size(-1) == num_classes + 2\n    loss_cls_cfg = dict(\n        type='SeesawLoss', p=0.0, q=0.0, loss_weight=1.0, num_classes=2)\n    loss_cls = build_loss(loss_cls_cfg)\n    # the length of fake_pred should be num_classes + 2 = 4\n    with pytest.raises(AssertionError):\n        fake_pred = torch.Tensor([[-100, 100]])\n        fake_label = torch.Tensor([1]).long()\n        loss_cls(fake_pred, fake_label)\n    # the length of fake_pred should be num_classes + 2 = 4\n    with pytest.raises(AssertionError):\n        fake_pred = torch.Tensor([[-100, 100, -100]])\n        fake_label = torch.Tensor([1]).long()\n        loss_cls(fake_pred, fake_label)\n\n    # test the calculation without p and q\n    loss_cls_cfg = dict(\n        type='SeesawLoss', p=0.0, q=0.0, loss_weight=1.0, num_classes=2)\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[-100, 100, -100, 100]])\n    fake_label = torch.Tensor([1]).long()\n    loss = loss_cls(fake_pred, fake_label)\n    assert torch.allclose(loss['loss_cls_objectness'], torch.tensor(200.))\n    assert torch.allclose(loss['loss_cls_classes'], torch.tensor(0.))\n\n    # test the calculation with p and without q\n    loss_cls_cfg = dict(\n        type='SeesawLoss', p=1.0, q=0.0, loss_weight=1.0, num_classes=2)\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[-100, 100, -100, 100]])\n    fake_label = torch.Tensor([0]).long()\n    loss_cls.cum_samples[0] = torch.exp(torch.Tensor([20]))\n    loss = loss_cls(fake_pred, fake_label)\n    assert torch.allclose(loss['loss_cls_objectness'], torch.tensor(200.))\n    assert torch.allclose(loss['loss_cls_classes'], torch.tensor(180.))\n\n    # test the calculation with q and without p\n    loss_cls_cfg = dict(\n        type='SeesawLoss', p=0.0, q=1.0, loss_weight=1.0, num_classes=2)\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[-100, 100, -100, 100]])\n    fake_label = torch.Tensor([0]).long()\n    loss = loss_cls(fake_pred, fake_label)\n    assert torch.allclose(loss['loss_cls_objectness'], torch.tensor(200.))\n    assert torch.allclose(loss['loss_cls_classes'],\n                          torch.tensor(200.) + torch.tensor(100.).log())\n\n    # test the others\n    loss_cls_cfg = dict(\n        type='SeesawLoss',\n        p=0.0,\n        q=1.0,\n        loss_weight=1.0,\n        num_classes=2,\n        return_dict=False)\n    loss_cls = build_loss(loss_cls_cfg)\n    fake_pred = torch.Tensor([[100, -100, 100, -100]])\n    fake_label = torch.Tensor([0]).long()\n    loss = loss_cls(fake_pred, fake_label)\n    acc = loss_cls.get_accuracy(fake_pred, fake_label)\n    act = loss_cls.get_activation(fake_pred)\n    assert torch.allclose(loss, torch.tensor(0.))\n    assert torch.allclose(acc['acc_objectness'], torch.tensor(100.))\n    assert torch.allclose(acc['acc_classes'], torch.tensor(100.))\n    assert torch.allclose(act, torch.tensor([1., 0., 0.]))\n\n\ndef test_accuracy():\n    # test for empty pred\n    pred = torch.empty(0, 4)\n    label = torch.empty(0)\n    accuracy = Accuracy(topk=1)\n    acc = accuracy(pred, label)\n    assert acc.item() == 0\n\n    pred = torch.Tensor([[0.2, 0.3, 0.6, 0.5], [0.1, 0.1, 0.2, 0.6],\n                         [0.9, 0.0, 0.0, 0.1], [0.4, 0.7, 0.1, 0.1],\n                         [0.0, 0.0, 0.99, 0]])\n    # test for top1\n    true_label = torch.Tensor([2, 3, 0, 1, 2]).long()\n    accuracy = Accuracy(topk=1)\n    acc = accuracy(pred, true_label)\n    assert acc.item() == 100\n\n    # test for top1 with score thresh=0.8\n    true_label = torch.Tensor([2, 3, 0, 1, 2]).long()\n    accuracy = Accuracy(topk=1, thresh=0.8)\n    acc = accuracy(pred, true_label)\n    assert acc.item() == 40\n\n    # test for top2\n    accuracy = Accuracy(topk=2)\n    label = torch.Tensor([3, 2, 0, 0, 2]).long()\n    acc = accuracy(pred, label)\n    assert acc.item() == 100\n\n    # test for both top1 and top2\n    accuracy = Accuracy(topk=(1, 2))\n    true_label = torch.Tensor([2, 3, 0, 1, 2]).long()\n    acc = accuracy(pred, true_label)\n    for a in acc:\n        assert a.item() == 100\n\n    # topk is larger than pred class number\n    with pytest.raises(AssertionError):\n        accuracy = Accuracy(topk=5)\n        accuracy(pred, true_label)\n\n    # wrong topk type\n    with pytest.raises(AssertionError):\n        accuracy = Accuracy(topk='wrong type')\n        accuracy(pred, true_label)\n\n    # label size is larger than required\n    with pytest.raises(AssertionError):\n        label = torch.Tensor([2, 3, 0, 1, 2, 0]).long()  # size mismatch\n        accuracy = Accuracy()\n        accuracy(pred, label)\n\n    # wrong pred dimension\n    with pytest.raises(AssertionError):\n        accuracy = Accuracy()\n        accuracy(pred[:, :, None], true_label)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_metrics/test_mean_ap.py",
    "content": "import numpy as np\n\nfrom mmdet.core.evaluation.mean_ap import (eval_map, tpfp_default,\n                                           tpfp_imagenet, tpfp_openimages)\n\ndet_bboxes = np.array([\n    [0, 0, 10, 10],\n    [10, 10, 20, 20],\n    [32, 32, 38, 42],\n])\ngt_bboxes = np.array([[0, 0, 10, 20], [0, 10, 10, 19], [10, 10, 20, 20]])\ngt_ignore = np.array([[5, 5, 10, 20], [6, 10, 10, 19]])\n\n\ndef test_tpfp_imagenet():\n\n    result = tpfp_imagenet(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        use_legacy_coordinate=True)\n    tp = result[0]\n    fp = result[1]\n    assert tp.shape == (1, 3)\n    assert fp.shape == (1, 3)\n    assert (tp == np.array([[1, 1, 0]])).all()\n    assert (fp == np.array([[0, 0, 1]])).all()\n\n    result = tpfp_imagenet(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        use_legacy_coordinate=False)\n    tp = result[0]\n    fp = result[1]\n    assert tp.shape == (1, 3)\n    assert fp.shape == (1, 3)\n    assert (tp == np.array([[1, 1, 0]])).all()\n    assert (fp == np.array([[0, 0, 1]])).all()\n\n\ndef test_tpfp_default():\n\n    result = tpfp_default(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        use_legacy_coordinate=True)\n\n    tp = result[0]\n    fp = result[1]\n    assert tp.shape == (1, 3)\n    assert fp.shape == (1, 3)\n    assert (tp == np.array([[1, 1, 0]])).all()\n    assert (fp == np.array([[0, 0, 1]])).all()\n    result = tpfp_default(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        use_legacy_coordinate=False)\n\n    tp = result[0]\n    fp = result[1]\n    assert tp.shape == (1, 3)\n    assert fp.shape == (1, 3)\n    assert (tp == np.array([[1, 1, 0]])).all()\n    assert (fp == np.array([[0, 0, 1]])).all()\n\n\ndef test_eval_map():\n\n    # 2 image and 2 classes\n    det_results = [[det_bboxes, det_bboxes], [det_bboxes, det_bboxes]]\n\n    labels = np.array([0, 1, 1])\n    labels_ignore = np.array([0, 1])\n    gt_info = {\n        'bboxes': gt_bboxes,\n        'bboxes_ignore': gt_ignore,\n        'labels': labels,\n        'labels_ignore': labels_ignore\n    }\n    annotations = [gt_info, gt_info]\n    mean_ap, eval_results = eval_map(\n        det_results, annotations, use_legacy_coordinate=True)\n    assert 0.291 < mean_ap < 0.293\n    mean_ap, eval_results = eval_map(\n        det_results, annotations, use_legacy_coordinate=False)\n    assert 0.291 < mean_ap < 0.293\n\n    # 1 image and 2 classes\n    det_results = [[det_bboxes, det_bboxes]]\n\n    labels = np.array([0, 1, 1])\n    labels_ignore = np.array([0, 1])\n    gt_info = {\n        'bboxes': gt_bboxes,\n        'bboxes_ignore': gt_ignore,\n        'labels': labels,\n        'labels_ignore': labels_ignore\n    }\n    annotations = [gt_info]\n    mean_ap, eval_results = eval_map(\n        det_results, annotations, use_legacy_coordinate=True)\n    assert 0.291 < mean_ap < 0.293\n    mean_ap, eval_results = eval_map(\n        det_results, annotations, use_legacy_coordinate=False)\n    assert 0.291 < mean_ap < 0.293\n\n\ndef test_tpfp_openimages():\n\n    det_bboxes = np.array([[10, 10, 15, 15, 1.0], [15, 15, 30, 30, 0.98],\n                           [10, 10, 25, 25, 0.98], [28, 28, 35, 35, 0.97],\n                           [30, 30, 51, 51, 0.96], [100, 110, 120, 130, 0.15]])\n    gt_bboxes = np.array([[10., 10., 30., 30.], [30., 30., 50., 50.]])\n    gt_groups_of = np.array([True, False], dtype=np.bool)\n    gt_ignore = np.zeros((0, 4))\n\n    # Open Images evaluation using group of.\n    result = tpfp_openimages(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        gt_bboxes_group_of=gt_groups_of,\n        use_group_of=True,\n        ioa_thr=0.5)\n\n    tp = result[0]\n    fp = result[1]\n    cls_dets = result[2]\n\n    assert tp.shape == (1, 4)\n    assert fp.shape == (1, 4)\n    assert cls_dets.shape == (4, 5)\n\n    assert (tp == np.array([[0, 1, 0, 1]])).all()\n    assert (fp == np.array([[1, 0, 1, 0]])).all()\n    cls_dets_gt = np.array([[28., 28., 35., 35., 0.97],\n                            [30., 30., 51., 51., 0.96],\n                            [100., 110., 120., 130., 0.15],\n                            [10., 10., 15., 15., 1.]])\n    assert (cls_dets == cls_dets_gt).all()\n\n    # Open Images evaluation not using group of.\n    result = tpfp_openimages(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        gt_bboxes_group_of=gt_groups_of,\n        use_group_of=False,\n        ioa_thr=0.5)\n    tp = result[0]\n    fp = result[1]\n    cls_dets = result[2]\n    assert tp.shape == (1, 6)\n    assert fp.shape == (1, 6)\n    assert cls_dets.shape == (6, 5)\n\n    # Open Images evaluation using group of, and gt is all group of bboxes.\n    gt_groups_of = np.array([True, True], dtype=np.bool)\n    result = tpfp_openimages(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        gt_bboxes_group_of=gt_groups_of,\n        use_group_of=True,\n        ioa_thr=0.5)\n    tp = result[0]\n    fp = result[1]\n    cls_dets = result[2]\n    assert tp.shape == (1, 3)\n    assert fp.shape == (1, 3)\n    assert cls_dets.shape == (3, 5)\n\n    # Open Images evaluation with empty gt.\n    gt_bboxes = np.zeros((0, 4))\n    gt_groups_of = np.empty((0))\n    result = tpfp_openimages(\n        det_bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_ignore,\n        gt_bboxes_group_of=gt_groups_of,\n        use_group_of=True,\n        ioa_thr=0.5)\n    fp = result[1]\n    assert (fp == np.array([[1, 1, 1, 1, 1, 1]])).all()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_metrics/test_recall.py",
    "content": "import numpy as np\n\nfrom mmdet.core.evaluation.recall import eval_recalls\n\ndet_bboxes = np.array([\n    [0, 0, 10, 10],\n    [10, 10, 20, 20],\n    [32, 32, 38, 42],\n])\ngt_bboxes = np.array([[0, 0, 10, 20], [0, 10, 10, 19], [10, 10, 20, 20]])\ngt_ignore = np.array([[5, 5, 10, 20], [6, 10, 10, 19]])\n\n\ndef test_eval_recalls():\n    gts = [gt_bboxes, gt_bboxes, gt_bboxes]\n    proposals = [det_bboxes, det_bboxes, det_bboxes]\n\n    recall = eval_recalls(\n        gts, proposals, proposal_nums=2, use_legacy_coordinate=True)\n    assert recall.shape == (1, 1)\n    assert 0.66 < recall[0][0] < 0.667\n    recall = eval_recalls(\n        gts, proposals, proposal_nums=2, use_legacy_coordinate=False)\n    assert recall.shape == (1, 1)\n    assert 0.66 < recall[0][0] < 0.667\n\n    recall = eval_recalls(\n        gts, proposals, proposal_nums=2, use_legacy_coordinate=True)\n    assert recall.shape == (1, 1)\n    assert 0.66 < recall[0][0] < 0.667\n    recall = eval_recalls(\n        gts,\n        proposals,\n        iou_thrs=[0.1, 0.9],\n        proposal_nums=2,\n        use_legacy_coordinate=False)\n    assert recall.shape == (1, 2)\n    assert recall[0][1] <= recall[0][0]\n    recall = eval_recalls(\n        gts,\n        proposals,\n        iou_thrs=[0.1, 0.9],\n        proposal_nums=2,\n        use_legacy_coordinate=True)\n    assert recall.shape == (1, 2)\n    assert recall[0][1] <= recall[0][0]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .utils import check_norm_state, is_block, is_norm\n\n__all__ = ['is_block', 'is_norm', 'check_norm_state']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_csp_darknet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.models.backbones.csp_darknet import CSPDarknet\nfrom .utils import check_norm_state, is_norm\n\n\ndef test_csp_darknet_backbone():\n    with pytest.raises(ValueError):\n        # frozen_stages must in range(-1, len(arch_setting) + 1)\n        CSPDarknet(frozen_stages=6)\n\n    with pytest.raises(AssertionError):\n        # out_indices in range(len(arch_setting) + 1)\n        CSPDarknet(out_indices=[6])\n\n    # Test CSPDarknet with first stage frozen\n    frozen_stages = 1\n    model = CSPDarknet(frozen_stages=frozen_stages)\n    model.train()\n\n    for mod in model.stem.modules():\n        for param in mod.parameters():\n            assert param.requires_grad is False\n    for i in range(1, frozen_stages + 1):\n        layer = getattr(model, f'stage{i}')\n        for mod in layer.modules():\n            if isinstance(mod, _BatchNorm):\n                assert mod.training is False\n        for param in layer.parameters():\n            assert param.requires_grad is False\n\n    # Test CSPDarknet with norm_eval=True\n    model = CSPDarknet(norm_eval=True)\n    model.train()\n\n    assert check_norm_state(model.modules(), False)\n\n    # Test CSPDarknet-P5 forward with widen_factor=0.5\n    model = CSPDarknet(arch='P5', widen_factor=0.25, out_indices=range(0, 5))\n    model.train()\n\n    imgs = torch.randn(1, 3, 64, 64)\n    feat = model(imgs)\n    assert len(feat) == 5\n    assert feat[0].shape == torch.Size((1, 16, 32, 32))\n    assert feat[1].shape == torch.Size((1, 32, 16, 16))\n    assert feat[2].shape == torch.Size((1, 64, 8, 8))\n    assert feat[3].shape == torch.Size((1, 128, 4, 4))\n    assert feat[4].shape == torch.Size((1, 256, 2, 2))\n\n    # Test CSPDarknet-P6 forward with widen_factor=0.5\n    model = CSPDarknet(\n        arch='P6',\n        widen_factor=0.25,\n        out_indices=range(0, 6),\n        spp_kernal_sizes=(3, 5, 7))\n    model.train()\n\n    imgs = torch.randn(1, 3, 128, 128)\n    feat = model(imgs)\n    assert feat[0].shape == torch.Size((1, 16, 64, 64))\n    assert feat[1].shape == torch.Size((1, 32, 32, 32))\n    assert feat[2].shape == torch.Size((1, 64, 16, 16))\n    assert feat[3].shape == torch.Size((1, 128, 8, 8))\n    assert feat[4].shape == torch.Size((1, 192, 4, 4))\n    assert feat[5].shape == torch.Size((1, 256, 2, 2))\n\n    # Test CSPDarknet forward with dict(type='ReLU')\n    model = CSPDarknet(\n        widen_factor=0.125, act_cfg=dict(type='ReLU'), out_indices=range(0, 5))\n    model.train()\n\n    imgs = torch.randn(1, 3, 64, 64)\n    feat = model(imgs)\n    assert len(feat) == 5\n    assert feat[0].shape == torch.Size((1, 8, 32, 32))\n    assert feat[1].shape == torch.Size((1, 16, 16, 16))\n    assert feat[2].shape == torch.Size((1, 32, 8, 8))\n    assert feat[3].shape == torch.Size((1, 64, 4, 4))\n    assert feat[4].shape == torch.Size((1, 128, 2, 2))\n\n    # Test CSPDarknet with BatchNorm forward\n    model = CSPDarknet(widen_factor=0.125, out_indices=range(0, 5))\n    for m in model.modules():\n        if is_norm(m):\n            assert isinstance(m, _BatchNorm)\n    model.train()\n\n    imgs = torch.randn(1, 3, 64, 64)\n    feat = model(imgs)\n    assert len(feat) == 5\n    assert feat[0].shape == torch.Size((1, 8, 32, 32))\n    assert feat[1].shape == torch.Size((1, 16, 16, 16))\n    assert feat[2].shape == torch.Size((1, 32, 8, 8))\n    assert feat[3].shape == torch.Size((1, 64, 4, 4))\n    assert feat[4].shape == torch.Size((1, 128, 2, 2))\n\n    # Test CSPDarknet with custom arch forward\n    arch_ovewrite = [[32, 56, 3, True, False], [56, 224, 2, True, False],\n                     [224, 512, 1, True, False]]\n    model = CSPDarknet(\n        arch_ovewrite=arch_ovewrite,\n        widen_factor=0.25,\n        out_indices=(0, 1, 2, 3))\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size((1, 8, 16, 16))\n    assert feat[1].shape == torch.Size((1, 14, 8, 8))\n    assert feat[2].shape == torch.Size((1, 56, 4, 4))\n    assert feat[3].shape == torch.Size((1, 128, 2, 2))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_detectors_resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\n\nfrom mmdet.models.backbones import DetectoRS_ResNet\n\n\ndef test_detectorrs_resnet_backbone():\n    detectorrs_cfg = dict(\n        depth=50,\n        num_stages=4,\n        out_indices=(0, 1, 2, 3),\n        frozen_stages=1,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        norm_eval=True,\n        style='pytorch',\n        conv_cfg=dict(type='ConvAWS'),\n        sac=dict(type='SAC', use_deform=True),\n        stage_with_sac=(False, True, True, True),\n        output_img=True)\n    \"\"\"Test init_weights config\"\"\"\n    with pytest.raises(AssertionError):\n        # pretrained and init_cfg cannot be specified at the same time\n        DetectoRS_ResNet(\n            **detectorrs_cfg, pretrained='Pretrained', init_cfg='Pretrained')\n\n    with pytest.raises(AssertionError):\n        # init_cfg must be a dict\n        DetectoRS_ResNet(\n            **detectorrs_cfg, pretrained=None, init_cfg=['Pretrained'])\n\n    with pytest.raises(KeyError):\n        # init_cfg must contain the key `type`\n        DetectoRS_ResNet(\n            **detectorrs_cfg,\n            pretrained=None,\n            init_cfg=dict(checkpoint='Pretrained'))\n\n    with pytest.raises(AssertionError):\n        # init_cfg only support initialize pretrained model way\n        DetectoRS_ResNet(\n            **detectorrs_cfg, pretrained=None, init_cfg=dict(type='Trained'))\n\n    with pytest.raises(TypeError):\n        # pretrained mast be a str or None\n        model = DetectoRS_ResNet(\n            **detectorrs_cfg, pretrained=['Pretrained'], init_cfg=None)\n        model.init_weights()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_efficientnet.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet.models.backbones import EfficientNet\n\n\ndef test_efficientnet_backbone():\n    \"\"\"Test EfficientNet backbone.\"\"\"\n    with pytest.raises(AssertionError):\n        # EfficientNet arch should be a key in EfficientNet.arch_settings\n        EfficientNet(arch='c3')\n\n    model = EfficientNet(arch='b0', out_indices=(0, 1, 2, 3, 4, 5, 6))\n    model.train()\n\n    imgs = torch.randn(2, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 7\n    assert feat[0].shape == torch.Size([2, 32, 16, 16])\n    assert feat[1].shape == torch.Size([2, 16, 16, 16])\n    assert feat[2].shape == torch.Size([2, 24, 8, 8])\n    assert feat[3].shape == torch.Size([2, 40, 4, 4])\n    assert feat[4].shape == torch.Size([2, 112, 2, 2])\n    assert feat[5].shape == torch.Size([2, 320, 1, 1])\n    assert feat[6].shape == torch.Size([2, 1280, 1, 1])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_hourglass.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones.hourglass import HourglassNet\n\n\ndef test_hourglass_backbone():\n    with pytest.raises(AssertionError):\n        # HourglassNet's num_stacks should larger than 0\n        HourglassNet(num_stacks=0)\n\n    with pytest.raises(AssertionError):\n        # len(stage_channels) should equal len(stage_blocks)\n        HourglassNet(\n            stage_channels=[256, 256, 384, 384, 384],\n            stage_blocks=[2, 2, 2, 2, 2, 4])\n\n    with pytest.raises(AssertionError):\n        # len(stage_channels) should lagrer than downsample_times\n        HourglassNet(\n            downsample_times=5,\n            stage_channels=[256, 256, 384, 384, 384],\n            stage_blocks=[2, 2, 2, 2, 2])\n\n    # Test HourglassNet-52\n    model = HourglassNet(\n        num_stacks=1,\n        stage_channels=(64, 64, 96, 96, 96, 128),\n        feat_channel=64)\n    model.train()\n\n    imgs = torch.randn(1, 3, 256, 256)\n    feat = model(imgs)\n    assert len(feat) == 1\n    assert feat[0].shape == torch.Size([1, 64, 64, 64])\n\n    # Test HourglassNet-104\n    model = HourglassNet(\n        num_stacks=2,\n        stage_channels=(64, 64, 96, 96, 96, 128),\n        feat_channel=64)\n    model.train()\n\n    imgs = torch.randn(1, 3, 256, 256)\n    feat = model(imgs)\n    assert len(feat) == 2\n    assert feat[0].shape == torch.Size([1, 64, 64, 64])\n    assert feat[1].shape == torch.Size([1, 64, 64, 64])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_hrnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones.hrnet import HRModule, HRNet\nfrom mmdet.models.backbones.resnet import BasicBlock, Bottleneck\n\n\n@pytest.mark.parametrize('block', [BasicBlock, Bottleneck])\ndef test_hrmodule(block):\n    # Test multiscale forward\n    num_channles = (32, 64)\n    in_channels = [c * block.expansion for c in num_channles]\n    hrmodule = HRModule(\n        num_branches=2,\n        blocks=block,\n        in_channels=in_channels,\n        num_blocks=(4, 4),\n        num_channels=num_channles,\n    )\n\n    feats = [\n        torch.randn(1, in_channels[0], 64, 64),\n        torch.randn(1, in_channels[1], 32, 32)\n    ]\n    feats = hrmodule(feats)\n\n    assert len(feats) == 2\n    assert feats[0].shape == torch.Size([1, in_channels[0], 64, 64])\n    assert feats[1].shape == torch.Size([1, in_channels[1], 32, 32])\n\n    # Test single scale forward\n    num_channles = (32, 64)\n    in_channels = [c * block.expansion for c in num_channles]\n    hrmodule = HRModule(\n        num_branches=2,\n        blocks=block,\n        in_channels=in_channels,\n        num_blocks=(4, 4),\n        num_channels=num_channles,\n        multiscale_output=False,\n    )\n\n    feats = [\n        torch.randn(1, in_channels[0], 64, 64),\n        torch.randn(1, in_channels[1], 32, 32)\n    ]\n    feats = hrmodule(feats)\n\n    assert len(feats) == 1\n    assert feats[0].shape == torch.Size([1, in_channels[0], 64, 64])\n\n\ndef test_hrnet_backbone():\n    # only have 3 stages\n    extra = dict(\n        stage1=dict(\n            num_modules=1,\n            num_branches=1,\n            block='BOTTLENECK',\n            num_blocks=(4, ),\n            num_channels=(64, )),\n        stage2=dict(\n            num_modules=1,\n            num_branches=2,\n            block='BASIC',\n            num_blocks=(4, 4),\n            num_channels=(32, 64)),\n        stage3=dict(\n            num_modules=4,\n            num_branches=3,\n            block='BASIC',\n            num_blocks=(4, 4, 4),\n            num_channels=(32, 64, 128)))\n\n    with pytest.raises(AssertionError):\n        # HRNet now only support 4 stages\n        HRNet(extra=extra)\n    extra['stage4'] = dict(\n        num_modules=3,\n        num_branches=3,  # should be 4\n        block='BASIC',\n        num_blocks=(4, 4, 4, 4),\n        num_channels=(32, 64, 128, 256))\n\n    with pytest.raises(AssertionError):\n        # len(num_blocks) should equal num_branches\n        HRNet(extra=extra)\n\n    extra['stage4']['num_branches'] = 4\n\n    # Test hrnetv2p_w32\n    model = HRNet(extra=extra)\n    model.init_weights()\n    model.train()\n\n    imgs = torch.randn(1, 3, 256, 256)\n    feats = model(imgs)\n    assert len(feats) == 4\n    assert feats[0].shape == torch.Size([1, 32, 64, 64])\n    assert feats[3].shape == torch.Size([1, 256, 8, 8])\n\n    # Test single scale output\n    model = HRNet(extra=extra, multiscale_output=False)\n    model.init_weights()\n    model.train()\n\n    imgs = torch.randn(1, 3, 256, 256)\n    feats = model(imgs)\n    assert len(feats) == 1\n    assert feats[0].shape == torch.Size([1, 32, 64, 64])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_mobilenet_v2.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom torch.nn.modules import GroupNorm\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.models.backbones.mobilenet_v2 import MobileNetV2\nfrom .utils import check_norm_state, is_block, is_norm\n\n\ndef test_mobilenetv2_backbone():\n    with pytest.raises(ValueError):\n        # frozen_stages must in range(-1, 8)\n        MobileNetV2(frozen_stages=8)\n\n    with pytest.raises(ValueError):\n        # out_indices in range(-1, 8)\n        MobileNetV2(out_indices=[8])\n\n    # Test MobileNetV2 with first stage frozen\n    frozen_stages = 1\n    model = MobileNetV2(frozen_stages=frozen_stages)\n    model.train()\n\n    for mod in model.conv1.modules():\n        for param in mod.parameters():\n            assert param.requires_grad is False\n    for i in range(1, frozen_stages + 1):\n        layer = getattr(model, f'layer{i}')\n        for mod in layer.modules():\n            if isinstance(mod, _BatchNorm):\n                assert mod.training is False\n        for param in layer.parameters():\n            assert param.requires_grad is False\n\n    # Test MobileNetV2 with norm_eval=True\n    model = MobileNetV2(norm_eval=True)\n    model.train()\n\n    assert check_norm_state(model.modules(), False)\n\n    # Test MobileNetV2 forward with widen_factor=1.0\n    model = MobileNetV2(widen_factor=1.0, out_indices=range(0, 8))\n    model.train()\n\n    assert check_norm_state(model.modules(), True)\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 8\n    assert feat[0].shape == torch.Size((1, 16, 112, 112))\n    assert feat[1].shape == torch.Size((1, 24, 56, 56))\n    assert feat[2].shape == torch.Size((1, 32, 28, 28))\n    assert feat[3].shape == torch.Size((1, 64, 14, 14))\n    assert feat[4].shape == torch.Size((1, 96, 14, 14))\n    assert feat[5].shape == torch.Size((1, 160, 7, 7))\n    assert feat[6].shape == torch.Size((1, 320, 7, 7))\n    assert feat[7].shape == torch.Size((1, 1280, 7, 7))\n\n    # Test MobileNetV2 forward with widen_factor=0.5\n    model = MobileNetV2(widen_factor=0.5, out_indices=range(0, 7))\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 7\n    assert feat[0].shape == torch.Size((1, 8, 112, 112))\n    assert feat[1].shape == torch.Size((1, 16, 56, 56))\n    assert feat[2].shape == torch.Size((1, 16, 28, 28))\n    assert feat[3].shape == torch.Size((1, 32, 14, 14))\n    assert feat[4].shape == torch.Size((1, 48, 14, 14))\n    assert feat[5].shape == torch.Size((1, 80, 7, 7))\n    assert feat[6].shape == torch.Size((1, 160, 7, 7))\n\n    # Test MobileNetV2 forward with widen_factor=2.0\n    model = MobileNetV2(widen_factor=2.0, out_indices=range(0, 8))\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert feat[0].shape == torch.Size((1, 32, 112, 112))\n    assert feat[1].shape == torch.Size((1, 48, 56, 56))\n    assert feat[2].shape == torch.Size((1, 64, 28, 28))\n    assert feat[3].shape == torch.Size((1, 128, 14, 14))\n    assert feat[4].shape == torch.Size((1, 192, 14, 14))\n    assert feat[5].shape == torch.Size((1, 320, 7, 7))\n    assert feat[6].shape == torch.Size((1, 640, 7, 7))\n    assert feat[7].shape == torch.Size((1, 2560, 7, 7))\n\n    # Test MobileNetV2 forward with dict(type='ReLU')\n    model = MobileNetV2(\n        widen_factor=1.0, act_cfg=dict(type='ReLU'), out_indices=range(0, 7))\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 7\n    assert feat[0].shape == torch.Size((1, 16, 112, 112))\n    assert feat[1].shape == torch.Size((1, 24, 56, 56))\n    assert feat[2].shape == torch.Size((1, 32, 28, 28))\n    assert feat[3].shape == torch.Size((1, 64, 14, 14))\n    assert feat[4].shape == torch.Size((1, 96, 14, 14))\n    assert feat[5].shape == torch.Size((1, 160, 7, 7))\n    assert feat[6].shape == torch.Size((1, 320, 7, 7))\n\n    # Test MobileNetV2 with BatchNorm forward\n    model = MobileNetV2(widen_factor=1.0, out_indices=range(0, 7))\n    for m in model.modules():\n        if is_norm(m):\n            assert isinstance(m, _BatchNorm)\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 7\n    assert feat[0].shape == torch.Size((1, 16, 112, 112))\n    assert feat[1].shape == torch.Size((1, 24, 56, 56))\n    assert feat[2].shape == torch.Size((1, 32, 28, 28))\n    assert feat[3].shape == torch.Size((1, 64, 14, 14))\n    assert feat[4].shape == torch.Size((1, 96, 14, 14))\n    assert feat[5].shape == torch.Size((1, 160, 7, 7))\n    assert feat[6].shape == torch.Size((1, 320, 7, 7))\n\n    # Test MobileNetV2 with GroupNorm forward\n    model = MobileNetV2(\n        widen_factor=1.0,\n        norm_cfg=dict(type='GN', num_groups=2, requires_grad=True),\n        out_indices=range(0, 7))\n    for m in model.modules():\n        if is_norm(m):\n            assert isinstance(m, GroupNorm)\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 7\n    assert feat[0].shape == torch.Size((1, 16, 112, 112))\n    assert feat[1].shape == torch.Size((1, 24, 56, 56))\n    assert feat[2].shape == torch.Size((1, 32, 28, 28))\n    assert feat[3].shape == torch.Size((1, 64, 14, 14))\n    assert feat[4].shape == torch.Size((1, 96, 14, 14))\n    assert feat[5].shape == torch.Size((1, 160, 7, 7))\n    assert feat[6].shape == torch.Size((1, 320, 7, 7))\n\n    # Test MobileNetV2 with layers 1, 3, 5 out forward\n    model = MobileNetV2(widen_factor=1.0, out_indices=(0, 2, 4))\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 3\n    assert feat[0].shape == torch.Size((1, 16, 112, 112))\n    assert feat[1].shape == torch.Size((1, 32, 28, 28))\n    assert feat[2].shape == torch.Size((1, 96, 14, 14))\n\n    # Test MobileNetV2 with checkpoint forward\n    model = MobileNetV2(\n        widen_factor=1.0, with_cp=True, out_indices=range(0, 7))\n    for m in model.modules():\n        if is_block(m):\n            assert m.with_cp\n    model.train()\n\n    imgs = torch.randn(1, 3, 224, 224)\n    feat = model(imgs)\n    assert len(feat) == 7\n    assert feat[0].shape == torch.Size((1, 16, 112, 112))\n    assert feat[1].shape == torch.Size((1, 24, 56, 56))\n    assert feat[2].shape == torch.Size((1, 32, 28, 28))\n    assert feat[3].shape == torch.Size((1, 64, 14, 14))\n    assert feat[4].shape == torch.Size((1, 96, 14, 14))\n    assert feat[5].shape == torch.Size((1, 160, 7, 7))\n    assert feat[6].shape == torch.Size((1, 320, 7, 7))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_pvt.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet.models.backbones.pvt import (PVTEncoderLayer,\n                                        PyramidVisionTransformer,\n                                        PyramidVisionTransformerV2)\n\n\ndef test_pvt_block():\n    # test PVT structure and forward\n    block = PVTEncoderLayer(\n        embed_dims=64, num_heads=4, feedforward_channels=256)\n    assert block.ffn.embed_dims == 64\n    assert block.attn.num_heads == 4\n    assert block.ffn.feedforward_channels == 256\n    x = torch.randn(1, 56 * 56, 64)\n    x_out = block(x, (56, 56))\n    assert x_out.shape == torch.Size([1, 56 * 56, 64])\n\n\ndef test_pvt():\n    \"\"\"Test PVT backbone.\"\"\"\n\n    with pytest.raises(TypeError):\n        # Pretrained arg must be str or None.\n        PyramidVisionTransformer(pretrained=123)\n\n    # test pretrained image size\n    with pytest.raises(AssertionError):\n        PyramidVisionTransformer(pretrain_img_size=(224, 224, 224))\n\n    # Test absolute position embedding\n    temp = torch.randn((1, 3, 224, 224))\n    model = PyramidVisionTransformer(\n        pretrain_img_size=224, use_abs_pos_embed=True)\n    model.init_weights()\n    model(temp)\n\n    # Test normal inference\n    temp = torch.randn((1, 3, 32, 32))\n    model = PyramidVisionTransformer()\n    outs = model(temp)\n    assert outs[0].shape == (1, 64, 8, 8)\n    assert outs[1].shape == (1, 128, 4, 4)\n    assert outs[2].shape == (1, 320, 2, 2)\n    assert outs[3].shape == (1, 512, 1, 1)\n\n    # Test abnormal inference size\n    temp = torch.randn((1, 3, 33, 33))\n    model = PyramidVisionTransformer()\n    outs = model(temp)\n    assert outs[0].shape == (1, 64, 8, 8)\n    assert outs[1].shape == (1, 128, 4, 4)\n    assert outs[2].shape == (1, 320, 2, 2)\n    assert outs[3].shape == (1, 512, 1, 1)\n\n    # Test abnormal inference size\n    temp = torch.randn((1, 3, 112, 137))\n    model = PyramidVisionTransformer()\n    outs = model(temp)\n    assert outs[0].shape == (1, 64, 28, 34)\n    assert outs[1].shape == (1, 128, 14, 17)\n    assert outs[2].shape == (1, 320, 7, 8)\n    assert outs[3].shape == (1, 512, 3, 4)\n\n\ndef test_pvtv2():\n    \"\"\"Test PVTv2 backbone.\"\"\"\n\n    with pytest.raises(TypeError):\n        # Pretrained arg must be str or None.\n        PyramidVisionTransformerV2(pretrained=123)\n\n    # test pretrained image size\n    with pytest.raises(AssertionError):\n        PyramidVisionTransformerV2(pretrain_img_size=(224, 224, 224))\n\n    # Test normal inference\n    temp = torch.randn((1, 3, 32, 32))\n    model = PyramidVisionTransformerV2()\n    outs = model(temp)\n    assert outs[0].shape == (1, 64, 8, 8)\n    assert outs[1].shape == (1, 128, 4, 4)\n    assert outs[2].shape == (1, 320, 2, 2)\n    assert outs[3].shape == (1, 512, 1, 1)\n\n    # Test abnormal inference size\n    temp = torch.randn((1, 3, 31, 31))\n    model = PyramidVisionTransformerV2()\n    outs = model(temp)\n    assert outs[0].shape == (1, 64, 8, 8)\n    assert outs[1].shape == (1, 128, 4, 4)\n    assert outs[2].shape == (1, 320, 2, 2)\n    assert outs[3].shape == (1, 512, 1, 1)\n\n    # Test abnormal inference size\n    temp = torch.randn((1, 3, 112, 137))\n    model = PyramidVisionTransformerV2()\n    outs = model(temp)\n    assert outs[0].shape == (1, 64, 28, 35)\n    assert outs[1].shape == (1, 128, 14, 18)\n    assert outs[2].shape == (1, 320, 7, 9)\n    assert outs[3].shape == (1, 512, 4, 5)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_regnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones import RegNet\n\nregnet_test_data = [\n    ('regnetx_400mf',\n     dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22,\n          bot_mul=1.0), [32, 64, 160, 384]),\n    ('regnetx_800mf',\n     dict(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16,\n          bot_mul=1.0), [64, 128, 288, 672]),\n    ('regnetx_1.6gf',\n     dict(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18,\n          bot_mul=1.0), [72, 168, 408, 912]),\n    ('regnetx_3.2gf',\n     dict(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25,\n          bot_mul=1.0), [96, 192, 432, 1008]),\n    ('regnetx_4.0gf',\n     dict(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23,\n          bot_mul=1.0), [80, 240, 560, 1360]),\n    ('regnetx_6.4gf',\n     dict(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17,\n          bot_mul=1.0), [168, 392, 784, 1624]),\n    ('regnetx_8.0gf',\n     dict(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23,\n          bot_mul=1.0), [80, 240, 720, 1920]),\n    ('regnetx_12gf',\n     dict(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19,\n          bot_mul=1.0), [224, 448, 896, 2240]),\n]\n\n\n@pytest.mark.parametrize('arch_name,arch,out_channels', regnet_test_data)\ndef test_regnet_backbone(arch_name, arch, out_channels):\n    with pytest.raises(AssertionError):\n        # ResNeXt depth should be in [50, 101, 152]\n        RegNet(arch_name + '233')\n\n    # Test RegNet with arch_name\n    model = RegNet(arch_name)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, out_channels[0], 8, 8])\n    assert feat[1].shape == torch.Size([1, out_channels[1], 4, 4])\n    assert feat[2].shape == torch.Size([1, out_channels[2], 2, 2])\n    assert feat[3].shape == torch.Size([1, out_channels[3], 1, 1])\n\n    # Test RegNet with arch\n    model = RegNet(arch)\n    assert feat[0].shape == torch.Size([1, out_channels[0], 8, 8])\n    assert feat[1].shape == torch.Size([1, out_channels[1], 4, 4])\n    assert feat[2].shape == torch.Size([1, out_channels[2], 2, 2])\n    assert feat[3].shape == torch.Size([1, out_channels[3], 1, 1])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_renext.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones import ResNeXt\nfrom mmdet.models.backbones.resnext import Bottleneck as BottleneckX\nfrom .utils import is_block\n\n\ndef test_renext_bottleneck():\n    with pytest.raises(AssertionError):\n        # Style must be in ['pytorch', 'caffe']\n        BottleneckX(64, 64, groups=32, base_width=4, style='tensorflow')\n\n    # Test ResNeXt Bottleneck structure\n    block = BottleneckX(\n        64, 64, groups=32, base_width=4, stride=2, style='pytorch')\n    assert block.conv2.stride == (2, 2)\n    assert block.conv2.groups == 32\n    assert block.conv2.out_channels == 128\n\n    # Test ResNeXt Bottleneck with DCN\n    dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)\n    with pytest.raises(AssertionError):\n        # conv_cfg must be None if dcn is not None\n        BottleneckX(\n            64,\n            64,\n            groups=32,\n            base_width=4,\n            dcn=dcn,\n            conv_cfg=dict(type='Conv'))\n    BottleneckX(64, 64, dcn=dcn)\n\n    # Test ResNeXt Bottleneck forward\n    block = BottleneckX(64, 16, groups=32, base_width=4)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test ResNeXt Bottleneck forward with plugins\n    plugins = [\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            stages=(False, False, True, True),\n            position='after_conv2')\n    ]\n    block = BottleneckX(64, 16, groups=32, base_width=4, plugins=plugins)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n\ndef test_resnext_backbone():\n    with pytest.raises(KeyError):\n        # ResNeXt depth should be in [50, 101, 152]\n        ResNeXt(depth=18)\n\n    # Test ResNeXt with group 32, base_width 4\n    model = ResNeXt(depth=50, groups=32, base_width=4)\n    for m in model.modules():\n        if is_block(m):\n            assert m.conv2.groups == 32\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 256, 8, 8])\n    assert feat[1].shape == torch.Size([1, 512, 4, 4])\n    assert feat[2].shape == torch.Size([1, 1024, 2, 2])\n    assert feat[3].shape == torch.Size([1, 2048, 1, 1])\n\n\nregnet_test_data = [\n    ('regnetx_400mf',\n     dict(w0=24, wa=24.48, wm=2.54, group_w=16, depth=22,\n          bot_mul=1.0), [32, 64, 160, 384]),\n    ('regnetx_800mf',\n     dict(w0=56, wa=35.73, wm=2.28, group_w=16, depth=16,\n          bot_mul=1.0), [64, 128, 288, 672]),\n    ('regnetx_1.6gf',\n     dict(w0=80, wa=34.01, wm=2.25, group_w=24, depth=18,\n          bot_mul=1.0), [72, 168, 408, 912]),\n    ('regnetx_3.2gf',\n     dict(w0=88, wa=26.31, wm=2.25, group_w=48, depth=25,\n          bot_mul=1.0), [96, 192, 432, 1008]),\n    ('regnetx_4.0gf',\n     dict(w0=96, wa=38.65, wm=2.43, group_w=40, depth=23,\n          bot_mul=1.0), [80, 240, 560, 1360]),\n    ('regnetx_6.4gf',\n     dict(w0=184, wa=60.83, wm=2.07, group_w=56, depth=17,\n          bot_mul=1.0), [168, 392, 784, 1624]),\n    ('regnetx_8.0gf',\n     dict(w0=80, wa=49.56, wm=2.88, group_w=120, depth=23,\n          bot_mul=1.0), [80, 240, 720, 1920]),\n    ('regnetx_12gf',\n     dict(w0=168, wa=73.36, wm=2.37, group_w=112, depth=19,\n          bot_mul=1.0), [224, 448, 896, 2240]),\n]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_res2net.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones import Res2Net\nfrom mmdet.models.backbones.res2net import Bottle2neck\nfrom .utils import is_block\n\n\ndef test_res2net_bottle2neck():\n    with pytest.raises(AssertionError):\n        # Style must be in ['pytorch', 'caffe']\n        Bottle2neck(64, 64, base_width=26, scales=4, style='tensorflow')\n\n    with pytest.raises(AssertionError):\n        # Scale must be larger than 1\n        Bottle2neck(64, 64, base_width=26, scales=1, style='pytorch')\n\n    # Test Res2Net Bottle2neck structure\n    block = Bottle2neck(\n        64, 64, base_width=26, stride=2, scales=4, style='pytorch')\n    assert block.scales == 4\n\n    # Test Res2Net Bottle2neck with DCN\n    dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)\n    with pytest.raises(AssertionError):\n        # conv_cfg must be None if dcn is not None\n        Bottle2neck(\n            64,\n            64,\n            base_width=26,\n            scales=4,\n            dcn=dcn,\n            conv_cfg=dict(type='Conv'))\n    Bottle2neck(64, 64, dcn=dcn)\n\n    # Test Res2Net Bottle2neck forward\n    block = Bottle2neck(64, 16, base_width=26, scales=4)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n\ndef test_res2net_backbone():\n    with pytest.raises(KeyError):\n        # Res2Net depth should be in [50, 101, 152]\n        Res2Net(depth=18)\n\n    # Test Res2Net with scales 4, base_width 26\n    model = Res2Net(depth=50, scales=4, base_width=26)\n    for m in model.modules():\n        if is_block(m):\n            assert m.scales == 4\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 256, 8, 8])\n    assert feat[1].shape == torch.Size([1, 512, 4, 4])\n    assert feat[2].shape == torch.Size([1, 1024, 2, 2])\n    assert feat[3].shape == torch.Size([1, 2048, 1, 1])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_resnest.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones import ResNeSt\nfrom mmdet.models.backbones.resnest import Bottleneck as BottleneckS\n\n\ndef test_resnest_bottleneck():\n    with pytest.raises(AssertionError):\n        # Style must be in ['pytorch', 'caffe']\n        BottleneckS(64, 64, radix=2, reduction_factor=4, style='tensorflow')\n\n    # Test ResNeSt Bottleneck structure\n    block = BottleneckS(\n        2, 4, radix=2, reduction_factor=4, stride=2, style='pytorch')\n    assert block.avd_layer.stride == 2\n    assert block.conv2.channels == 4\n\n    # Test ResNeSt Bottleneck forward\n    block = BottleneckS(16, 4, radix=2, reduction_factor=4)\n    x = torch.randn(2, 16, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([2, 16, 56, 56])\n\n\ndef test_resnest_backbone():\n    with pytest.raises(KeyError):\n        # ResNeSt depth should be in [50, 101, 152, 200]\n        ResNeSt(depth=18)\n\n    # Test ResNeSt with radix 2, reduction_factor 4\n    model = ResNeSt(\n        depth=50,\n        base_channels=4,\n        radix=2,\n        reduction_factor=4,\n        out_indices=(0, 1, 2, 3))\n    model.train()\n\n    imgs = torch.randn(2, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([2, 16, 8, 8])\n    assert feat[1].shape == torch.Size([2, 32, 4, 4])\n    assert feat[2].shape == torch.Size([2, 64, 2, 2])\n    assert feat[3].shape == torch.Size([2, 128, 1, 1])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom mmcv import assert_params_all_zeros\nfrom mmcv.ops import DeformConv2dPack\nfrom torch.nn.modules import AvgPool2d, GroupNorm\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.models.backbones import ResNet, ResNetV1d\nfrom mmdet.models.backbones.resnet import BasicBlock, Bottleneck\nfrom mmdet.models.utils import ResLayer, SimplifiedBasicBlock\nfrom .utils import check_norm_state, is_block, is_norm\n\n\ndef test_resnet_basic_block():\n    with pytest.raises(AssertionError):\n        # Not implemented yet.\n        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)\n        BasicBlock(64, 64, dcn=dcn)\n\n    with pytest.raises(AssertionError):\n        # Not implemented yet.\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv3')\n        ]\n        BasicBlock(64, 64, plugins=plugins)\n\n    with pytest.raises(AssertionError):\n        # Not implemented yet\n        plugins = [\n            dict(\n                cfg=dict(\n                    type='GeneralizedAttention',\n                    spatial_range=-1,\n                    num_heads=8,\n                    attention_type='0010',\n                    kv_stride=2),\n                position='after_conv2')\n        ]\n        BasicBlock(64, 64, plugins=plugins)\n\n    # test BasicBlock structure and forward\n    block = BasicBlock(64, 64)\n    assert block.conv1.in_channels == 64\n    assert block.conv1.out_channels == 64\n    assert block.conv1.kernel_size == (3, 3)\n    assert block.conv2.in_channels == 64\n    assert block.conv2.out_channels == 64\n    assert block.conv2.kernel_size == (3, 3)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test BasicBlock with checkpoint forward\n    block = BasicBlock(64, 64, with_cp=True)\n    assert block.with_cp\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n\ndef test_resnet_bottleneck():\n    with pytest.raises(AssertionError):\n        # Style must be in ['pytorch', 'caffe']\n        Bottleneck(64, 64, style='tensorflow')\n\n    with pytest.raises(AssertionError):\n        # Allowed positions are 'after_conv1', 'after_conv2', 'after_conv3'\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv4')\n        ]\n        Bottleneck(64, 16, plugins=plugins)\n\n    with pytest.raises(AssertionError):\n        # Need to specify different postfix to avoid duplicate plugin name\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv3'),\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv3')\n        ]\n        Bottleneck(64, 16, plugins=plugins)\n\n    with pytest.raises(KeyError):\n        # Plugin type is not supported\n        plugins = [dict(cfg=dict(type='WrongPlugin'), position='after_conv3')]\n        Bottleneck(64, 16, plugins=plugins)\n\n    # Test Bottleneck with checkpoint forward\n    block = Bottleneck(64, 16, with_cp=True)\n    assert block.with_cp\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test Bottleneck style\n    block = Bottleneck(64, 64, stride=2, style='pytorch')\n    assert block.conv1.stride == (1, 1)\n    assert block.conv2.stride == (2, 2)\n    block = Bottleneck(64, 64, stride=2, style='caffe')\n    assert block.conv1.stride == (2, 2)\n    assert block.conv2.stride == (1, 1)\n\n    # Test Bottleneck DCN\n    dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)\n    with pytest.raises(AssertionError):\n        Bottleneck(64, 64, dcn=dcn, conv_cfg=dict(type='Conv'))\n    block = Bottleneck(64, 64, dcn=dcn)\n    assert isinstance(block.conv2, DeformConv2dPack)\n\n    # Test Bottleneck forward\n    block = Bottleneck(64, 16)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test Bottleneck with 1 ContextBlock after conv3\n    plugins = [\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            position='after_conv3')\n    ]\n    block = Bottleneck(64, 16, plugins=plugins)\n    assert block.context_block.in_channels == 64\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test Bottleneck with 1 GeneralizedAttention after conv2\n    plugins = [\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            position='after_conv2')\n    ]\n    block = Bottleneck(64, 16, plugins=plugins)\n    assert block.gen_attention_block.in_channels == 16\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test Bottleneck with 1 GeneralizedAttention after conv2, 1 NonLocal2D\n    # after conv2, 1 ContextBlock after conv3\n    plugins = [\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            position='after_conv2'),\n        dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            position='after_conv3')\n    ]\n    block = Bottleneck(64, 16, plugins=plugins)\n    assert block.gen_attention_block.in_channels == 16\n    assert block.nonlocal_block.in_channels == 16\n    assert block.context_block.in_channels == 64\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test Bottleneck with 1 ContextBlock after conv2, 2 ContextBlock after\n    # conv3\n    plugins = [\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),\n            position='after_conv2'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),\n            position='after_conv3'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=3),\n            position='after_conv3')\n    ]\n    block = Bottleneck(64, 16, plugins=plugins)\n    assert block.context_block1.in_channels == 16\n    assert block.context_block2.in_channels == 64\n    assert block.context_block3.in_channels == 64\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n\ndef test_simplied_basic_block():\n    with pytest.raises(AssertionError):\n        # Not implemented yet.\n        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)\n        SimplifiedBasicBlock(64, 64, dcn=dcn)\n\n    with pytest.raises(AssertionError):\n        # Not implemented yet.\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv3')\n        ]\n        SimplifiedBasicBlock(64, 64, plugins=plugins)\n\n    with pytest.raises(AssertionError):\n        # Not implemented yet\n        plugins = [\n            dict(\n                cfg=dict(\n                    type='GeneralizedAttention',\n                    spatial_range=-1,\n                    num_heads=8,\n                    attention_type='0010',\n                    kv_stride=2),\n                position='after_conv2')\n        ]\n        SimplifiedBasicBlock(64, 64, plugins=plugins)\n\n    with pytest.raises(AssertionError):\n        # Not implemented yet\n        SimplifiedBasicBlock(64, 64, with_cp=True)\n\n    # test SimplifiedBasicBlock structure and forward\n    block = SimplifiedBasicBlock(64, 64)\n    assert block.conv1.in_channels == 64\n    assert block.conv1.out_channels == 64\n    assert block.conv1.kernel_size == (3, 3)\n    assert block.conv2.in_channels == 64\n    assert block.conv2.out_channels == 64\n    assert block.conv2.kernel_size == (3, 3)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # test SimplifiedBasicBlock without norm\n    block = SimplifiedBasicBlock(64, 64, norm_cfg=None)\n    assert block.norm1 is None\n    assert block.norm2 is None\n    x_out = block(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n\ndef test_resnet_res_layer():\n    # Test ResLayer of 3 Bottleneck w\\o downsample\n    layer = ResLayer(Bottleneck, 64, 16, 3)\n    assert len(layer) == 3\n    assert layer[0].conv1.in_channels == 64\n    assert layer[0].conv1.out_channels == 16\n    for i in range(1, len(layer)):\n        assert layer[i].conv1.in_channels == 64\n        assert layer[i].conv1.out_channels == 16\n    for i in range(len(layer)):\n        assert layer[i].downsample is None\n    x = torch.randn(1, 64, 56, 56)\n    x_out = layer(x)\n    assert x_out.shape == torch.Size([1, 64, 56, 56])\n\n    # Test ResLayer of 3 Bottleneck with downsample\n    layer = ResLayer(Bottleneck, 64, 64, 3)\n    assert layer[0].downsample[0].out_channels == 256\n    for i in range(1, len(layer)):\n        assert layer[i].downsample is None\n    x = torch.randn(1, 64, 56, 56)\n    x_out = layer(x)\n    assert x_out.shape == torch.Size([1, 256, 56, 56])\n\n    # Test ResLayer of 3 Bottleneck with stride=2\n    layer = ResLayer(Bottleneck, 64, 64, 3, stride=2)\n    assert layer[0].downsample[0].out_channels == 256\n    assert layer[0].downsample[0].stride == (2, 2)\n    for i in range(1, len(layer)):\n        assert layer[i].downsample is None\n    x = torch.randn(1, 64, 56, 56)\n    x_out = layer(x)\n    assert x_out.shape == torch.Size([1, 256, 28, 28])\n\n    # Test ResLayer of 3 Bottleneck with stride=2 and average downsample\n    layer = ResLayer(Bottleneck, 64, 64, 3, stride=2, avg_down=True)\n    assert isinstance(layer[0].downsample[0], AvgPool2d)\n    assert layer[0].downsample[1].out_channels == 256\n    assert layer[0].downsample[1].stride == (1, 1)\n    for i in range(1, len(layer)):\n        assert layer[i].downsample is None\n    x = torch.randn(1, 64, 56, 56)\n    x_out = layer(x)\n    assert x_out.shape == torch.Size([1, 256, 28, 28])\n\n    # Test ResLayer of 3 BasicBlock with stride=2 and downsample_first=False\n    layer = ResLayer(BasicBlock, 64, 64, 3, stride=2, downsample_first=False)\n    assert layer[2].downsample[0].out_channels == 64\n    assert layer[2].downsample[0].stride == (2, 2)\n    for i in range(len(layer) - 1):\n        assert layer[i].downsample is None\n    x = torch.randn(1, 64, 56, 56)\n    x_out = layer(x)\n    assert x_out.shape == torch.Size([1, 64, 28, 28])\n\n\ndef test_resnest_stem():\n    # Test default stem_channels\n    model = ResNet(50)\n    assert model.stem_channels == 64\n    assert model.conv1.out_channels == 64\n    assert model.norm1.num_features == 64\n\n    # Test default stem_channels, with base_channels=3\n    model = ResNet(50, base_channels=3)\n    assert model.stem_channels == 3\n    assert model.conv1.out_channels == 3\n    assert model.norm1.num_features == 3\n    assert model.layer1[0].conv1.in_channels == 3\n\n    # Test stem_channels=3\n    model = ResNet(50, stem_channels=3)\n    assert model.stem_channels == 3\n    assert model.conv1.out_channels == 3\n    assert model.norm1.num_features == 3\n    assert model.layer1[0].conv1.in_channels == 3\n\n    # Test stem_channels=3, with base_channels=2\n    model = ResNet(50, stem_channels=3, base_channels=2)\n    assert model.stem_channels == 3\n    assert model.conv1.out_channels == 3\n    assert model.norm1.num_features == 3\n    assert model.layer1[0].conv1.in_channels == 3\n\n    # Test V1d stem_channels\n    model = ResNetV1d(depth=50, stem_channels=6)\n    model.train()\n    assert model.stem[0].out_channels == 3\n    assert model.stem[1].num_features == 3\n    assert model.stem[3].out_channels == 3\n    assert model.stem[4].num_features == 3\n    assert model.stem[6].out_channels == 6\n    assert model.stem[7].num_features == 6\n    assert model.layer1[0].conv1.in_channels == 6\n\n\ndef test_resnet_backbone():\n    \"\"\"Test resnet backbone.\"\"\"\n    with pytest.raises(KeyError):\n        # ResNet depth should be in [18, 34, 50, 101, 152]\n        ResNet(20)\n\n    with pytest.raises(AssertionError):\n        # In ResNet: 1 <= num_stages <= 4\n        ResNet(50, num_stages=0)\n\n    with pytest.raises(AssertionError):\n        # len(stage_with_dcn) == num_stages\n        dcn = dict(type='DCN', deform_groups=1, fallback_on_stride=False)\n        ResNet(50, dcn=dcn, stage_with_dcn=(True, ))\n\n    with pytest.raises(AssertionError):\n        # len(stage_with_plugin) == num_stages\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                stages=(False, True, True),\n                position='after_conv3')\n        ]\n        ResNet(50, plugins=plugins)\n\n    with pytest.raises(AssertionError):\n        # In ResNet: 1 <= num_stages <= 4\n        ResNet(50, num_stages=5)\n\n    with pytest.raises(AssertionError):\n        # len(strides) == len(dilations) == num_stages\n        ResNet(50, strides=(1, ), dilations=(1, 1), num_stages=3)\n\n    with pytest.raises(TypeError):\n        # pretrained must be a string path\n        model = ResNet(50, pretrained=0)\n\n    with pytest.raises(AssertionError):\n        # Style must be in ['pytorch', 'caffe']\n        ResNet(50, style='tensorflow')\n\n    # Test ResNet50 norm_eval=True\n    model = ResNet(50, norm_eval=True, base_channels=1)\n    model.train()\n    assert check_norm_state(model.modules(), False)\n\n    # Test ResNet50 with torchvision pretrained weight\n    model = ResNet(\n        depth=50, norm_eval=True, pretrained='torchvision://resnet50')\n    model.train()\n    assert check_norm_state(model.modules(), False)\n\n    # Test ResNet50 with first stage frozen\n    frozen_stages = 1\n    model = ResNet(50, frozen_stages=frozen_stages, base_channels=1)\n    model.train()\n    assert model.norm1.training is False\n    for layer in [model.conv1, model.norm1]:\n        for param in layer.parameters():\n            assert param.requires_grad is False\n    for i in range(1, frozen_stages + 1):\n        layer = getattr(model, f'layer{i}')\n        for mod in layer.modules():\n            if isinstance(mod, _BatchNorm):\n                assert mod.training is False\n        for param in layer.parameters():\n            assert param.requires_grad is False\n\n    # Test ResNet50V1d with first stage frozen\n    model = ResNetV1d(depth=50, frozen_stages=frozen_stages, base_channels=2)\n    assert len(model.stem) == 9\n    model.train()\n    assert check_norm_state(model.stem, False)\n    for param in model.stem.parameters():\n        assert param.requires_grad is False\n    for i in range(1, frozen_stages + 1):\n        layer = getattr(model, f'layer{i}')\n        for mod in layer.modules():\n            if isinstance(mod, _BatchNorm):\n                assert mod.training is False\n        for param in layer.parameters():\n            assert param.requires_grad is False\n\n    # Test ResNet18 forward\n    model = ResNet(18)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 64, 8, 8])\n    assert feat[1].shape == torch.Size([1, 128, 4, 4])\n    assert feat[2].shape == torch.Size([1, 256, 2, 2])\n    assert feat[3].shape == torch.Size([1, 512, 1, 1])\n\n    # Test ResNet18 with checkpoint forward\n    model = ResNet(18, with_cp=True)\n    for m in model.modules():\n        if is_block(m):\n            assert m.with_cp\n\n    # Test ResNet50 with BatchNorm forward\n    model = ResNet(50, base_channels=1)\n    for m in model.modules():\n        if is_norm(m):\n            assert isinstance(m, _BatchNorm)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 4, 8, 8])\n    assert feat[1].shape == torch.Size([1, 8, 4, 4])\n    assert feat[2].shape == torch.Size([1, 16, 2, 2])\n    assert feat[3].shape == torch.Size([1, 32, 1, 1])\n\n    # Test ResNet50 with layers 1, 2, 3 out forward\n    model = ResNet(50, out_indices=(0, 1, 2), base_channels=1)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 3\n    assert feat[0].shape == torch.Size([1, 4, 8, 8])\n    assert feat[1].shape == torch.Size([1, 8, 4, 4])\n    assert feat[2].shape == torch.Size([1, 16, 2, 2])\n\n    # Test ResNet50 with checkpoint forward\n    model = ResNet(50, with_cp=True, base_channels=1)\n    for m in model.modules():\n        if is_block(m):\n            assert m.with_cp\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 4, 8, 8])\n    assert feat[1].shape == torch.Size([1, 8, 4, 4])\n    assert feat[2].shape == torch.Size([1, 16, 2, 2])\n    assert feat[3].shape == torch.Size([1, 32, 1, 1])\n\n    # Test ResNet50 with GroupNorm forward\n    model = ResNet(\n        50,\n        base_channels=4,\n        norm_cfg=dict(type='GN', num_groups=2, requires_grad=True))\n    for m in model.modules():\n        if is_norm(m):\n            assert isinstance(m, GroupNorm)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 16, 8, 8])\n    assert feat[1].shape == torch.Size([1, 32, 4, 4])\n    assert feat[2].shape == torch.Size([1, 64, 2, 2])\n    assert feat[3].shape == torch.Size([1, 128, 1, 1])\n\n    # Test ResNet50 with 1 GeneralizedAttention after conv2, 1 NonLocal2D\n    # after conv2, 1 ContextBlock after conv3 in layers 2, 3, 4\n    plugins = [\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            stages=(False, True, True, True),\n            position='after_conv2'),\n        dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            stages=(False, True, True, False),\n            position='after_conv3')\n    ]\n    model = ResNet(50, plugins=plugins, base_channels=8)\n    for m in model.layer1.modules():\n        if is_block(m):\n            assert not hasattr(m, 'context_block')\n            assert not hasattr(m, 'gen_attention_block')\n            assert m.nonlocal_block.in_channels == 8\n    for m in model.layer2.modules():\n        if is_block(m):\n            assert m.nonlocal_block.in_channels == 16\n            assert m.gen_attention_block.in_channels == 16\n            assert m.context_block.in_channels == 64\n\n    for m in model.layer3.modules():\n        if is_block(m):\n            assert m.nonlocal_block.in_channels == 32\n            assert m.gen_attention_block.in_channels == 32\n            assert m.context_block.in_channels == 128\n\n    for m in model.layer4.modules():\n        if is_block(m):\n            assert m.nonlocal_block.in_channels == 64\n            assert m.gen_attention_block.in_channels == 64\n            assert not hasattr(m, 'context_block')\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 32, 8, 8])\n    assert feat[1].shape == torch.Size([1, 64, 4, 4])\n    assert feat[2].shape == torch.Size([1, 128, 2, 2])\n    assert feat[3].shape == torch.Size([1, 256, 1, 1])\n\n    # Test ResNet50 with 1 ContextBlock after conv2, 1 ContextBlock after\n    # conv3 in layers 2, 3, 4\n    plugins = [\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),\n            stages=(False, True, True, False),\n            position='after_conv3'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),\n            stages=(False, True, True, False),\n            position='after_conv3')\n    ]\n\n    model = ResNet(50, plugins=plugins, base_channels=8)\n    for m in model.layer1.modules():\n        if is_block(m):\n            assert not hasattr(m, 'context_block')\n            assert not hasattr(m, 'context_block1')\n            assert not hasattr(m, 'context_block2')\n    for m in model.layer2.modules():\n        if is_block(m):\n            assert not hasattr(m, 'context_block')\n            assert m.context_block1.in_channels == 64\n            assert m.context_block2.in_channels == 64\n\n    for m in model.layer3.modules():\n        if is_block(m):\n            assert not hasattr(m, 'context_block')\n            assert m.context_block1.in_channels == 128\n            assert m.context_block2.in_channels == 128\n\n    for m in model.layer4.modules():\n        if is_block(m):\n            assert not hasattr(m, 'context_block')\n            assert not hasattr(m, 'context_block1')\n            assert not hasattr(m, 'context_block2')\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 32, 8, 8])\n    assert feat[1].shape == torch.Size([1, 64, 4, 4])\n    assert feat[2].shape == torch.Size([1, 128, 2, 2])\n    assert feat[3].shape == torch.Size([1, 256, 1, 1])\n\n    # Test ResNet50 zero initialization of residual\n    model = ResNet(50, zero_init_residual=True, base_channels=1)\n    model.init_weights()\n    for m in model.modules():\n        if isinstance(m, Bottleneck):\n            assert assert_params_all_zeros(m.norm3)\n        elif isinstance(m, BasicBlock):\n            assert assert_params_all_zeros(m.norm2)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 4, 8, 8])\n    assert feat[1].shape == torch.Size([1, 8, 4, 4])\n    assert feat[2].shape == torch.Size([1, 16, 2, 2])\n    assert feat[3].shape == torch.Size([1, 32, 1, 1])\n\n    # Test ResNetV1d forward\n    model = ResNetV1d(depth=50, base_channels=2)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 4\n    assert feat[0].shape == torch.Size([1, 8, 8, 8])\n    assert feat[1].shape == torch.Size([1, 16, 4, 4])\n    assert feat[2].shape == torch.Size([1, 32, 2, 2])\n    assert feat[3].shape == torch.Size([1, 64, 1, 1])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_swin.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet.models.backbones.swin import SwinBlock, SwinTransformer\n\n\ndef test_swin_block():\n    # test SwinBlock structure and forward\n    block = SwinBlock(embed_dims=64, num_heads=4, feedforward_channels=256)\n    assert block.ffn.embed_dims == 64\n    assert block.attn.w_msa.num_heads == 4\n    assert block.ffn.feedforward_channels == 256\n    x = torch.randn(1, 56 * 56, 64)\n    x_out = block(x, (56, 56))\n    assert x_out.shape == torch.Size([1, 56 * 56, 64])\n\n    # Test BasicBlock with checkpoint forward\n    block = SwinBlock(\n        embed_dims=64, num_heads=4, feedforward_channels=256, with_cp=True)\n    assert block.with_cp\n    x = torch.randn(1, 56 * 56, 64)\n    x_out = block(x, (56, 56))\n    assert x_out.shape == torch.Size([1, 56 * 56, 64])\n\n\ndef test_swin_transformer():\n    \"\"\"Test Swin Transformer backbone.\"\"\"\n\n    with pytest.raises(TypeError):\n        # Pretrained arg must be str or None.\n        SwinTransformer(pretrained=123)\n\n    with pytest.raises(AssertionError):\n        # Because swin uses non-overlapping patch embed, so the stride of patch\n        # embed must be equal to patch size.\n        SwinTransformer(strides=(2, 2, 2, 2), patch_size=4)\n\n    # test pretrained image size\n    with pytest.raises(AssertionError):\n        SwinTransformer(pretrain_img_size=(224, 224, 224))\n\n    # Test absolute position embedding\n    temp = torch.randn((1, 3, 224, 224))\n    model = SwinTransformer(pretrain_img_size=224, use_abs_pos_embed=True)\n    model.init_weights()\n    model(temp)\n    # Test different inputs when use absolute position embedding\n    temp = torch.randn((1, 3, 112, 112))\n    model(temp)\n    temp = torch.randn((1, 3, 256, 256))\n    model(temp)\n\n    # Test patch norm\n    model = SwinTransformer(patch_norm=False)\n    model(temp)\n\n    # Test normal inference\n    temp = torch.randn((1, 3, 32, 32))\n    model = SwinTransformer()\n    outs = model(temp)\n    assert outs[0].shape == (1, 96, 8, 8)\n    assert outs[1].shape == (1, 192, 4, 4)\n    assert outs[2].shape == (1, 384, 2, 2)\n    assert outs[3].shape == (1, 768, 1, 1)\n\n    # Test abnormal inference size\n    temp = torch.randn((1, 3, 31, 31))\n    model = SwinTransformer()\n    outs = model(temp)\n    assert outs[0].shape == (1, 96, 8, 8)\n    assert outs[1].shape == (1, 192, 4, 4)\n    assert outs[2].shape == (1, 384, 2, 2)\n    assert outs[3].shape == (1, 768, 1, 1)\n\n    # Test abnormal inference size\n    temp = torch.randn((1, 3, 112, 137))\n    model = SwinTransformer()\n    outs = model(temp)\n    assert outs[0].shape == (1, 96, 28, 35)\n    assert outs[1].shape == (1, 192, 14, 18)\n    assert outs[2].shape == (1, 384, 7, 9)\n    assert outs[3].shape == (1, 768, 4, 5)\n\n    model = SwinTransformer(frozen_stages=4)\n    model.train()\n    for p in model.parameters():\n        assert not p.requires_grad\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/test_trident_resnet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.backbones import TridentResNet\nfrom mmdet.models.backbones.trident_resnet import TridentBottleneck\n\n\ndef test_trident_resnet_bottleneck():\n    trident_dilations = (1, 2, 3)\n    test_branch_idx = 1\n    concat_output = True\n    trident_build_config = (trident_dilations, test_branch_idx, concat_output)\n\n    with pytest.raises(AssertionError):\n        # Style must be in ['pytorch', 'caffe']\n        TridentBottleneck(\n            *trident_build_config, inplanes=64, planes=64, style='tensorflow')\n\n    with pytest.raises(AssertionError):\n        # Allowed positions are 'after_conv1', 'after_conv2', 'after_conv3'\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv4')\n        ]\n        TridentBottleneck(\n            *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n\n    with pytest.raises(AssertionError):\n        # Need to specify different postfix to avoid duplicate plugin name\n        plugins = [\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv3'),\n            dict(\n                cfg=dict(type='ContextBlock', ratio=1. / 16),\n                position='after_conv3')\n        ]\n        TridentBottleneck(\n            *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n\n    with pytest.raises(KeyError):\n        # Plugin type is not supported\n        plugins = [dict(cfg=dict(type='WrongPlugin'), position='after_conv3')]\n        TridentBottleneck(\n            *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n\n    # Test Bottleneck with checkpoint forward\n    block = TridentBottleneck(\n        *trident_build_config, inplanes=64, planes=16, with_cp=True)\n    assert block.with_cp\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([block.num_branch, 64, 56, 56])\n\n    # Test Bottleneck style\n    block = TridentBottleneck(\n        *trident_build_config,\n        inplanes=64,\n        planes=64,\n        stride=2,\n        style='pytorch')\n    assert block.conv1.stride == (1, 1)\n    assert block.conv2.stride == (2, 2)\n    block = TridentBottleneck(\n        *trident_build_config, inplanes=64, planes=64, stride=2, style='caffe')\n    assert block.conv1.stride == (2, 2)\n    assert block.conv2.stride == (1, 1)\n\n    # Test Bottleneck forward\n    block = TridentBottleneck(*trident_build_config, inplanes=64, planes=16)\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([block.num_branch, 64, 56, 56])\n\n    # Test Bottleneck with 1 ContextBlock after conv3\n    plugins = [\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            position='after_conv3')\n    ]\n    block = TridentBottleneck(\n        *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n    assert block.context_block.in_channels == 64\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([block.num_branch, 64, 56, 56])\n\n    # Test Bottleneck with 1 GeneralizedAttention after conv2\n    plugins = [\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            position='after_conv2')\n    ]\n    block = TridentBottleneck(\n        *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n    assert block.gen_attention_block.in_channels == 16\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([block.num_branch, 64, 56, 56])\n\n    # Test Bottleneck with 1 GeneralizedAttention after conv2, 1 NonLocal2D\n    # after conv2, 1 ContextBlock after conv3\n    plugins = [\n        dict(\n            cfg=dict(\n                type='GeneralizedAttention',\n                spatial_range=-1,\n                num_heads=8,\n                attention_type='0010',\n                kv_stride=2),\n            position='after_conv2'),\n        dict(cfg=dict(type='NonLocal2d'), position='after_conv2'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16),\n            position='after_conv3')\n    ]\n    block = TridentBottleneck(\n        *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n    assert block.gen_attention_block.in_channels == 16\n    assert block.nonlocal_block.in_channels == 16\n    assert block.context_block.in_channels == 64\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([block.num_branch, 64, 56, 56])\n\n    # Test Bottleneck with 1 ContextBlock after conv2, 2 ContextBlock after\n    # conv3\n    plugins = [\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=1),\n            position='after_conv2'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=2),\n            position='after_conv3'),\n        dict(\n            cfg=dict(type='ContextBlock', ratio=1. / 16, postfix=3),\n            position='after_conv3')\n    ]\n    block = TridentBottleneck(\n        *trident_build_config, inplanes=64, planes=16, plugins=plugins)\n    assert block.context_block1.in_channels == 16\n    assert block.context_block2.in_channels == 64\n    assert block.context_block3.in_channels == 64\n    x = torch.randn(1, 64, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size([block.num_branch, 64, 56, 56])\n\n\ndef test_trident_resnet_backbone():\n    tridentresnet_config = dict(\n        num_branch=3,\n        test_branch_idx=1,\n        strides=(1, 2, 2),\n        dilations=(1, 1, 1),\n        trident_dilations=(1, 2, 3),\n        out_indices=(2, ),\n    )\n    \"\"\"Test tridentresnet backbone.\"\"\"\n    with pytest.raises(AssertionError):\n        # TridentResNet depth should be in [50, 101, 152]\n        TridentResNet(18, **tridentresnet_config)\n\n    with pytest.raises(AssertionError):\n        # In TridentResNet: num_stages == 3\n        TridentResNet(50, num_stages=4, **tridentresnet_config)\n\n    model = TridentResNet(50, num_stages=3, **tridentresnet_config)\n    model.train()\n\n    imgs = torch.randn(1, 3, 32, 32)\n    feat = model(imgs)\n    assert len(feat) == 1\n    assert feat[0].shape == torch.Size([3, 1024, 2, 2])\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_backbones/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom torch.nn.modules import GroupNorm\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.models.backbones.res2net import Bottle2neck\nfrom mmdet.models.backbones.resnet import BasicBlock, Bottleneck\nfrom mmdet.models.backbones.resnext import Bottleneck as BottleneckX\nfrom mmdet.models.utils import SimplifiedBasicBlock\n\n\ndef is_block(modules):\n    \"\"\"Check if is ResNet building block.\"\"\"\n    if isinstance(modules, (BasicBlock, Bottleneck, BottleneckX, Bottle2neck,\n                            SimplifiedBasicBlock)):\n        return True\n    return False\n\n\ndef is_norm(modules):\n    \"\"\"Check if is one of the norms.\"\"\"\n    if isinstance(modules, (GroupNorm, _BatchNorm)):\n        return True\n    return False\n\n\ndef check_norm_state(modules, train_state):\n    \"\"\"Check if norm layer is in correct train state.\"\"\"\n    for mod in modules:\n        if isinstance(mod, _BatchNorm):\n            if mod.training != train_state:\n                return False\n    return True\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_anchor_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import AnchorHead\n\n\ndef test_anchor_head_loss():\n    \"\"\"Tests anchor head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False))\n    self = AnchorHead(num_classes=4, in_channels=1, train_cfg=cfg)\n\n    # Anchor head expects a multiple levels of features per image\n    feat = [\n        torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))\n        for i in range(len(self.anchor_generator.strides))\n    ]\n    cls_scores, bbox_preds = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n    onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_atss_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import ATSSHead\n\n\ndef test_atss_head_loss():\n    \"\"\"Tests atss head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(type='ATSSAssigner', topk=9),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    self = ATSSHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    cls_scores, bbox_preds, centernesses = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, centernesses,\n                                gt_bboxes, gt_labels, img_metas,\n                                gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    empty_centerness_loss = sum(empty_gt_losses['loss_centerness'])\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_centerness_loss.item() == 0, (\n        'there should be no centerness loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, centernesses, gt_bboxes,\n                              gt_labels, img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n    onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n    onegt_centerness_loss = sum(one_gt_losses['loss_centerness'])\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_centerness_loss.item() > 0, (\n        'centerness loss should be non-zero')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_autoassign_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads.autoassign_head import AutoAssignHead\nfrom mmdet.models.dense_heads.paa_head import levels_to_images\n\n\ndef test_autoassign_head_loss():\n    \"\"\"Tests autoassign head loss when truth is empty and non-empty.\"\"\"\n\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(assigner=None, allowed_border=-1, pos_weight=-1, debug=False))\n    self = AutoAssignHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    self.init_weights()\n    cls_scores, bbox_preds, objectnesses = self(feat)\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, objectnesses,\n                                gt_bboxes, gt_labels, img_metas,\n                                gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_pos_loss = empty_gt_losses['loss_pos']\n    empty_neg_loss = empty_gt_losses['loss_neg']\n    empty_center_loss = empty_gt_losses['loss_center']\n    assert empty_neg_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_pos_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_center_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, objectnesses, gt_bboxes,\n                              gt_labels, img_metas, gt_bboxes_ignore)\n    onegt_pos_loss = one_gt_losses['loss_pos']\n    onegt_neg_loss = one_gt_losses['loss_neg']\n    onegt_center_loss = one_gt_losses['loss_center']\n    assert onegt_pos_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_neg_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_center_loss.item() > 0, 'box loss should be non-zero'\n    n, c, h, w = 10, 4, 20, 20\n    mlvl_tensor = [torch.ones(n, c, h, w) for i in range(5)]\n    results = levels_to_images(mlvl_tensor)\n    assert len(results) == n\n    assert results[0].size() == (h * w * 5, c)\n\n    self = AutoAssignHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        strides=(4, ))\n    cls_scores = [torch.ones(2, 4, 5, 5)]\n    bbox_preds = [torch.ones(2, 4, 5, 5)]\n    iou_preds = [torch.ones(2, 1, 5, 5)]\n    cfg = mmcv.Config(\n        dict(\n            nms_pre=1000,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.6),\n            max_per_img=100))\n    rescale = False\n    self.get_bboxes(\n        cls_scores, bbox_preds, iou_preds, img_metas, cfg, rescale=rescale)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_centernet_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom mmcv import ConfigDict\n\nfrom mmdet.models.dense_heads import CenterNetHead\n\n\ndef test_center_head_loss():\n    \"\"\"Tests center head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    test_cfg = dict(topK=100, max_per_img=100)\n    self = CenterNetHead(\n        num_classes=4, in_channel=1, feat_channel=4, test_cfg=test_cfg)\n\n    feat = [torch.rand(1, 1, s, s)]\n    center_out, wh_out, offset_out = self.forward(feat)\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(center_out, wh_out, offset_out, gt_bboxes,\n                                gt_labels, img_metas, gt_bboxes_ignore)\n    loss_center = empty_gt_losses['loss_center_heatmap']\n    loss_wh = empty_gt_losses['loss_wh']\n    loss_offset = empty_gt_losses['loss_offset']\n    assert loss_center.item() > 0, 'loss_center should be non-zero'\n    assert loss_wh.item() == 0, (\n        'there should be no loss_wh when there are no true boxes')\n    assert loss_offset.item() == 0, (\n        'there should be no loss_offset when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(center_out, wh_out, offset_out, gt_bboxes,\n                              gt_labels, img_metas, gt_bboxes_ignore)\n    loss_center = one_gt_losses['loss_center_heatmap']\n    loss_wh = one_gt_losses['loss_wh']\n    loss_offset = one_gt_losses['loss_offset']\n    assert loss_center.item() > 0, 'loss_center should be non-zero'\n    assert loss_wh.item() > 0, 'loss_wh should be non-zero'\n    assert loss_offset.item() > 0, 'loss_offset should be non-zero'\n\n\ndef test_centernet_head_get_bboxes():\n    \"\"\"Tests center head generating and decoding the heatmap.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': np.array([1., 1., 1., 1.]),\n        'pad_shape': (s, s, 3),\n        'batch_input_shape': (s, s),\n        'border': (0, 0, 0, 0),\n        'flip': False\n    }]\n    test_cfg = ConfigDict(\n        dict(topk=100, local_maximum_kernel=3, max_per_img=100))\n    gt_bboxes = [\n        torch.Tensor([[10, 20, 200, 240], [40, 50, 100, 200],\n                      [10, 20, 100, 240]])\n    ]\n    gt_labels = [torch.LongTensor([1, 1, 2])]\n\n    self = CenterNetHead(\n        num_classes=4, in_channel=1, feat_channel=4, test_cfg=test_cfg)\n    self.feat_shape = (1, 1, s // 4, s // 4)\n    targets, _ = self.get_targets(gt_bboxes, gt_labels, self.feat_shape,\n                                  img_metas[0]['pad_shape'])\n    center_target = targets['center_heatmap_target']\n    wh_target = targets['wh_target']\n    offset_target = targets['offset_target']\n    # make sure assign target right\n    for i in range(len(gt_bboxes[0])):\n        bbox, label = gt_bboxes[0][i] / 4, gt_labels[0][i]\n        ctx, cty = sum(bbox[0::2]) / 2, sum(bbox[1::2]) / 2\n        int_ctx, int_cty = int(sum(bbox[0::2]) / 2), int(sum(bbox[1::2]) / 2)\n        w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]\n        x_off = ctx - int(ctx)\n        y_off = cty - int(cty)\n        assert center_target[0, label, int_cty, int_ctx] == 1\n        assert wh_target[0, 0, int_cty, int_ctx] == w\n        assert wh_target[0, 1, int_cty, int_ctx] == h\n        assert offset_target[0, 0, int_cty, int_ctx] == x_off\n        assert offset_target[0, 1, int_cty, int_ctx] == y_off\n    # make sure get_bboxes is right\n    detections = self.get_bboxes([center_target], [wh_target], [offset_target],\n                                 img_metas,\n                                 rescale=True,\n                                 with_nms=False)\n    out_bboxes = detections[0][0][:3]\n    out_clses = detections[0][1][:3]\n    for bbox, cls in zip(out_bboxes, out_clses):\n        flag = False\n        for gt_bbox, gt_cls in zip(gt_bboxes[0], gt_labels[0]):\n            if (bbox[:4] == gt_bbox[:4]).all():\n                flag = True\n        assert flag, 'get_bboxes is wrong'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_corner_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\nfrom mmdet.models.dense_heads import CornerHead\n\n\ndef test_corner_head_loss():\n    \"\"\"Tests corner head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    self = CornerHead(num_classes=4, in_channels=1)\n\n    # Corner head expects a multiple levels of features per image\n    feat = [\n        torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels)\n    ]\n    tl_heats, br_heats, tl_embs, br_embs, tl_offs, br_offs = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,\n                                br_offs, gt_bboxes, gt_labels, img_metas,\n                                gt_bboxes_ignore)\n    empty_det_loss = sum(empty_gt_losses['det_loss'])\n    empty_push_loss = sum(empty_gt_losses['push_loss'])\n    empty_pull_loss = sum(empty_gt_losses['pull_loss'])\n    empty_off_loss = sum(empty_gt_losses['off_loss'])\n    assert empty_det_loss.item() > 0, 'det loss should be non-zero'\n    assert empty_push_loss.item() == 0, (\n        'there should be no push loss when there are no true boxes')\n    assert empty_pull_loss.item() == 0, (\n        'there should be no pull loss when there are no true boxes')\n    assert empty_off_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,\n                              br_offs, gt_bboxes, gt_labels, img_metas,\n                              gt_bboxes_ignore)\n    onegt_det_loss = sum(one_gt_losses['det_loss'])\n    onegt_push_loss = sum(one_gt_losses['push_loss'])\n    onegt_pull_loss = sum(one_gt_losses['pull_loss'])\n    onegt_off_loss = sum(one_gt_losses['off_loss'])\n    assert onegt_det_loss.item() > 0, 'det loss should be non-zero'\n    assert onegt_push_loss.item() == 0, (\n        'there should be no push loss when there are only one true box')\n    assert onegt_pull_loss.item() > 0, 'pull loss should be non-zero'\n    assert onegt_off_loss.item() > 0, 'off loss should be non-zero'\n\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874],\n                      [123.6667, 123.8757, 138.6326, 251.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2, 3])]\n\n    # equalize the corners' embedding value of different objects to make the\n    # push_loss larger than 0\n    gt_bboxes_ind = (gt_bboxes[0] // 4).int().tolist()\n    for tl_emb_feat, br_emb_feat in zip(tl_embs, br_embs):\n        tl_emb_feat[:, :, gt_bboxes_ind[0][1],\n                    gt_bboxes_ind[0][0]] = tl_emb_feat[:, :,\n                                                       gt_bboxes_ind[1][1],\n                                                       gt_bboxes_ind[1][0]]\n        br_emb_feat[:, :, gt_bboxes_ind[0][3],\n                    gt_bboxes_ind[0][2]] = br_emb_feat[:, :,\n                                                       gt_bboxes_ind[1][3],\n                                                       gt_bboxes_ind[1][2]]\n\n    two_gt_losses = self.loss(tl_heats, br_heats, tl_embs, br_embs, tl_offs,\n                              br_offs, gt_bboxes, gt_labels, img_metas,\n                              gt_bboxes_ignore)\n    twogt_det_loss = sum(two_gt_losses['det_loss'])\n    twogt_push_loss = sum(two_gt_losses['push_loss'])\n    twogt_pull_loss = sum(two_gt_losses['pull_loss'])\n    twogt_off_loss = sum(two_gt_losses['off_loss'])\n    assert twogt_det_loss.item() > 0, 'det loss should be non-zero'\n    assert twogt_push_loss.item() > 0, 'push loss should be non-zero'\n    assert twogt_pull_loss.item() > 0, 'pull loss should be non-zero'\n    assert twogt_off_loss.item() > 0, 'off loss should be non-zero'\n\n\ndef test_corner_head_encode_and_decode_heatmap():\n    \"\"\"Tests corner head generating and decoding the heatmap.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3),\n        'border': (0, 0, 0, 0)\n    }]\n\n    gt_bboxes = [\n        torch.Tensor([[10, 20, 200, 240], [40, 50, 100, 200],\n                      [10, 20, 200, 240]])\n    ]\n    gt_labels = [torch.LongTensor([1, 1, 2])]\n\n    self = CornerHead(num_classes=4, in_channels=1, corner_emb_channels=1)\n\n    feat = [\n        torch.rand(1, 1, s // 4, s // 4) for _ in range(self.num_feat_levels)\n    ]\n\n    targets = self.get_targets(\n        gt_bboxes,\n        gt_labels,\n        feat[0].shape,\n        img_metas[0]['pad_shape'],\n        with_corner_emb=self.with_corner_emb)\n\n    gt_tl_heatmap = targets['topleft_heatmap']\n    gt_br_heatmap = targets['bottomright_heatmap']\n    gt_tl_offset = targets['topleft_offset']\n    gt_br_offset = targets['bottomright_offset']\n    embedding = targets['corner_embedding']\n    [top, left], [bottom, right] = embedding[0][0]\n    gt_tl_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4])\n    gt_br_embedding_heatmap = torch.zeros([1, 1, s // 4, s // 4])\n    gt_tl_embedding_heatmap[0, 0, top, left] = 1\n    gt_br_embedding_heatmap[0, 0, bottom, right] = 1\n\n    batch_bboxes, batch_scores, batch_clses = self.decode_heatmap(\n        tl_heat=gt_tl_heatmap,\n        br_heat=gt_br_heatmap,\n        tl_off=gt_tl_offset,\n        br_off=gt_br_offset,\n        tl_emb=gt_tl_embedding_heatmap,\n        br_emb=gt_br_embedding_heatmap,\n        img_meta=img_metas[0],\n        k=100,\n        kernel=3,\n        distance_threshold=0.5)\n\n    bboxes = batch_bboxes.view(-1, 4)\n    scores = batch_scores.view(-1, 1)\n    clses = batch_clses.view(-1, 1)\n\n    idx = scores.argsort(dim=0, descending=True)\n    bboxes = bboxes[idx].view(-1, 4)\n    scores = scores[idx].view(-1)\n    clses = clses[idx].view(-1)\n\n    valid_bboxes = bboxes[torch.where(scores > 0.05)]\n    valid_labels = clses[torch.where(scores > 0.05)]\n    max_coordinate = valid_bboxes.max()\n    offsets = valid_labels.to(valid_bboxes) * (max_coordinate + 1)\n    gt_offsets = gt_labels[0].to(gt_bboxes[0]) * (max_coordinate + 1)\n\n    offset_bboxes = valid_bboxes + offsets[:, None]\n    offset_gtbboxes = gt_bboxes[0] + gt_offsets[:, None]\n\n    iou_matrix = bbox_overlaps(offset_bboxes.numpy(), offset_gtbboxes.numpy())\n    assert (iou_matrix == 1).sum() == 3\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_ddod_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import DDODHead\n\n\ndef test_ddod_head_loss():\n    \"\"\"Tests ddod head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(  # ATSSAssigner\n            assigner=dict(type='ATSSAssigner', topk=9, alpha=0.8),\n            reg_assigner=dict(type='ATSSAssigner', topk=9, alpha=0.5),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    self = DDODHead(\n        num_classes=4,\n        in_channels=1,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        train_cfg=train_cfg,\n        norm_cfg=dict(type='GN', num_groups=32, requires_grad=True),\n        loss_iou=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    cls_scores, bbox_preds, iou_preds = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,\n                                gt_labels, img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    empty_iou_loss = sum(empty_gt_losses['loss_iou'])\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_iou_loss.item() == 0, (\n        'there should be no iou loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,\n                              gt_labels, img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n    onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n    onegt_iou_loss = sum(one_gt_losses['loss_iou'])\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_iou_loss.item() > 0, 'iou loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_dense_heads_attr.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport warnings\n\nfrom terminaltables import AsciiTable\n\nfrom mmdet.models import dense_heads\nfrom mmdet.models.dense_heads import *  # noqa: F401,F403\n\n\ndef test_dense_heads_test_attr():\n    \"\"\"Tests inference methods such as simple_test and aug_test.\"\"\"\n    # make list of dense heads\n    exceptions = ['FeatureAdaption']  # module used in head\n    all_dense_heads = [m for m in dense_heads.__all__ if m not in exceptions]\n\n    # search attributes\n    check_attributes = [\n        'simple_test', 'aug_test', 'simple_test_bboxes', 'simple_test_rpn',\n        'aug_test_rpn'\n    ]\n    table_header = ['head name'] + check_attributes\n    table_data = [table_header]\n    not_found = {k: [] for k in check_attributes}\n    for target_head_name in all_dense_heads:\n        target_head = globals()[target_head_name]\n        target_head_attributes = dir(target_head)\n        check_results = [target_head_name]\n        for check_attribute in check_attributes:\n            found = check_attribute in target_head_attributes\n            check_results.append(found)\n            if not found:\n                not_found[check_attribute].append(target_head_name)\n        table_data.append(check_results)\n    table = AsciiTable(table_data)\n    print()\n    print(table.table)\n\n    # NOTE: this test just checks attributes.\n    # simple_test of RPN heads will not work now.\n    assert len(not_found['simple_test']) == 0, \\\n        f'simple_test not found in {not_found[\"simple_test\"]}'\n    if len(not_found['aug_test']) != 0:\n        warnings.warn(f'aug_test not found in {not_found[\"aug_test\"]}. '\n                      'Please implement it or raise NotImplementedError.')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_detr_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nfrom mmcv import ConfigDict\n\nfrom mmdet.models.dense_heads import DETRHead\n\n\ndef test_detr_head_loss():\n    \"\"\"Tests transformer head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3),\n        'batch_input_shape': (s, s)\n    }]\n    config = ConfigDict(\n        dict(\n            type='DETRHead',\n            num_classes=80,\n            in_channels=200,\n            transformer=dict(\n                type='Transformer',\n                encoder=dict(\n                    type='DetrTransformerEncoder',\n                    num_layers=6,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        attn_cfgs=[\n                            dict(\n                                type='MultiheadAttention',\n                                embed_dims=256,\n                                num_heads=8,\n                                dropout=0.1)\n                        ],\n                        feedforward_channels=2048,\n                        ffn_dropout=0.1,\n                        operation_order=('self_attn', 'norm', 'ffn', 'norm'))),\n                decoder=dict(\n                    type='DetrTransformerDecoder',\n                    return_intermediate=True,\n                    num_layers=6,\n                    transformerlayers=dict(\n                        type='DetrTransformerDecoderLayer',\n                        attn_cfgs=dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1),\n                        feedforward_channels=2048,\n                        ffn_dropout=0.1,\n                        operation_order=('self_attn', 'norm', 'cross_attn',\n                                         'norm', 'ffn', 'norm')),\n                )),\n            positional_encoding=dict(\n                type='SinePositionalEncoding', num_feats=128, normalize=True),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                bg_cls_weight=0.1,\n                use_sigmoid=False,\n                loss_weight=1.0,\n                class_weight=1.0),\n            loss_bbox=dict(type='L1Loss', loss_weight=5.0),\n            loss_iou=dict(type='GIoULoss', loss_weight=2.0)))\n\n    self = DETRHead(**config)\n    self.init_weights()\n    feat = [torch.rand(1, 200, 10, 10)]\n    cls_scores, bbox_preds = self.forward(feat, img_metas)\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    for key, loss in empty_gt_losses.items():\n        if 'cls' in key:\n            assert loss.item() > 0, 'cls loss should be non-zero'\n        elif 'bbox' in key:\n            assert loss.item(\n            ) == 0, 'there should be no box loss when there are no true boxes'\n        elif 'iou' in key:\n            assert loss.item(\n            ) == 0, 'there should be no iou loss when there are no true boxes'\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    for loss in one_gt_losses.values():\n        assert loss.item(\n        ) > 0, 'cls loss, or box loss, or iou loss should be non-zero'\n\n    # test forward_train\n    self.forward_train(feat, img_metas, gt_bboxes, gt_labels)\n\n    # test inference mode\n    self.get_bboxes(cls_scores, bbox_preds, img_metas, rescale=True)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_fcos_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import FCOSHead\n\n\ndef test_fcos_head_loss():\n    \"\"\"Tests fcos head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.4,\n                min_pos_iou=0,\n                ignore_iof_thr=-1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    # since Focal Loss is not supported on CPU\n    self = FCOSHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    cls_scores, bbox_preds, centerness = self.forward(feat)\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, centerness, gt_bboxes,\n                                gt_labels, img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    empty_box_loss = empty_gt_losses['loss_bbox']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, centerness, gt_bboxes,\n                              gt_labels, img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    onegt_box_loss = one_gt_losses['loss_bbox']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_fsaf_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import FSAFHead\n\n\ndef test_fsaf_head_loss():\n    \"\"\"Tests anchor head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    cfg = dict(\n        reg_decoded_bbox=True,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=1,\n            scales_per_octave=1,\n            ratios=[1.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(type='TBLRBBoxCoder', normalizer=4.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0,\n            reduction='none'),\n        loss_bbox=dict(\n            type='IoULoss', eps=1e-6, loss_weight=1.0, reduction='none'))\n\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='CenterRegionAssigner',\n                pos_scale=0.2,\n                neg_scale=0.2,\n                min_pos_iof=0.01),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    head = FSAFHead(num_classes=4, in_channels=1, train_cfg=train_cfg, **cfg)\n    if torch.cuda.is_available():\n        head.cuda()\n        # FSAF head expects a multiple levels of features per image\n        feat = [\n            torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2))).cuda()\n            for i in range(len(head.anchor_generator.strides))\n        ]\n        cls_scores, bbox_preds = head.forward(feat)\n        gt_bboxes_ignore = None\n\n        # When truth is non-empty then both cls and box loss should be nonzero\n        #  for random inputs\n        gt_bboxes = [\n            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),\n        ]\n        gt_labels = [torch.LongTensor([2]).cuda()]\n        one_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                  img_metas, gt_bboxes_ignore)\n        onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n        onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n\n        # Test that empty ground truth encourages the network to predict bkg\n        gt_bboxes = [torch.empty((0, 4)).cuda()]\n        gt_labels = [torch.LongTensor([]).cuda()]\n\n        empty_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes,\n                                    gt_labels, img_metas, gt_bboxes_ignore)\n        # When there is no truth, the cls loss should be nonzero but there\n        # should be no box loss.\n        empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n        empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert empty_box_loss.item() == 0, (\n            'there should be no box loss when there are no true boxes')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_ga_anchor_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import GuidedAnchorHead\n\n\ndef test_ga_anchor_head_loss():\n    \"\"\"Tests anchor head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            ga_assigner=dict(\n                type='ApproxMaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                ignore_iof_thr=-1),\n            ga_sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            allowed_border=-1,\n            center_ratio=0.2,\n            ignore_ratio=0.5,\n            pos_weight=-1,\n            debug=False))\n    head = GuidedAnchorHead(num_classes=4, in_channels=4, train_cfg=cfg)\n\n    # Anchor head expects a multiple levels of features per image\n    if torch.cuda.is_available():\n        head.cuda()\n        feat = [\n            torch.rand(1, 4, s // (2**(i + 2)), s // (2**(i + 2))).cuda()\n            for i in range(len(head.approx_anchor_generator.base_anchors))\n        ]\n        cls_scores, bbox_preds, shape_preds, loc_preds = head.forward(feat)\n\n        # Test that empty ground truth encourages the network to predict\n        # background\n        gt_bboxes = [torch.empty((0, 4)).cuda()]\n        gt_labels = [torch.LongTensor([]).cuda()]\n\n        gt_bboxes_ignore = None\n\n        empty_gt_losses = head.loss(cls_scores, bbox_preds, shape_preds,\n                                    loc_preds, gt_bboxes, gt_labels, img_metas,\n                                    gt_bboxes_ignore)\n\n        # When there is no truth, the cls loss should be nonzero but there\n        # should be no box loss.\n        empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n        empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert empty_box_loss.item() == 0, (\n            'there should be no box loss when there are no true boxes')\n\n        # When truth is non-empty then both cls and box loss should be nonzero\n        # for random inputs\n        gt_bboxes = [\n            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),\n        ]\n        gt_labels = [torch.LongTensor([2]).cuda()]\n        one_gt_losses = head.loss(cls_scores, bbox_preds, shape_preds,\n                                  loc_preds, gt_bboxes, gt_labels, img_metas,\n                                  gt_bboxes_ignore)\n        onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n        onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_gfl_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import GFLHead\n\n\ndef test_gfl_head_loss():\n    \"\"\"Tests gfl head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(type='ATSSAssigner', topk=9),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    self = GFLHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            beta=2.0,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    cls_scores, bbox_preds = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    empty_dfl_loss = sum(empty_gt_losses['loss_dfl'])\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_dfl_loss.item() == 0, (\n        'there should be no dfl loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n    onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n    onegt_dfl_loss = sum(one_gt_losses['loss_dfl'])\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_dfl_loss.item() > 0, 'dfl loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_lad_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.models.dense_heads import LADHead, lad_head\nfrom mmdet.models.dense_heads.lad_head import levels_to_images\n\n\ndef test_lad_head_loss():\n    \"\"\"Tests lad head loss when truth is empty and non-empty.\"\"\"\n\n    class mock_skm:\n\n        def GaussianMixture(self, *args, **kwargs):\n            return self\n\n        def fit(self, loss):\n            pass\n\n        def predict(self, loss):\n            components = np.zeros_like(loss, dtype=np.long)\n            return components.reshape(-1)\n\n        def score_samples(self, loss):\n            scores = np.random.random(len(loss))\n            return scores\n\n    lad_head.skm = mock_skm()\n\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.1,\n                neg_iou_thr=0.1,\n                min_pos_iou=0,\n                ignore_iof_thr=-1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    # since Focal Loss is not supported on CPU\n    self = LADHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))\n    teacher_model = LADHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    self.init_weights()\n    teacher_model.init_weights()\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n\n    outs_teacher = teacher_model(feat)\n    label_assignment_results = teacher_model.get_label_assignment(\n        *outs_teacher, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore)\n\n    outs = teacher_model(feat)\n    empty_gt_losses = self.loss(*outs, gt_bboxes, gt_labels, img_metas,\n                                gt_bboxes_ignore, label_assignment_results)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    empty_box_loss = empty_gt_losses['loss_bbox']\n    empty_iou_loss = empty_gt_losses['loss_iou']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_iou_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n\n    label_assignment_results = teacher_model.get_label_assignment(\n        *outs_teacher, gt_bboxes, gt_labels, img_metas, gt_bboxes_ignore)\n\n    one_gt_losses = self.loss(*outs, gt_bboxes, gt_labels, img_metas,\n                              gt_bboxes_ignore, label_assignment_results)\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    onegt_box_loss = one_gt_losses['loss_bbox']\n    onegt_iou_loss = one_gt_losses['loss_iou']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_iou_loss.item() > 0, 'box loss should be non-zero'\n    n, c, h, w = 10, 4, 20, 20\n    mlvl_tensor = [torch.ones(n, c, h, w) for i in range(5)]\n    results = levels_to_images(mlvl_tensor)\n    assert len(results) == n\n    assert results[0].size() == (h * w * 5, c)\n    assert self.with_score_voting\n\n    self = LADHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))\n    cls_scores = [torch.ones(2, 4, 5, 5)]\n    bbox_preds = [torch.ones(2, 4, 5, 5)]\n    iou_preds = [torch.ones(2, 1, 5, 5)]\n    cfg = mmcv.Config(\n        dict(\n            nms_pre=1000,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.6),\n            max_per_img=100))\n    rescale = False\n    self.get_bboxes(\n        cls_scores, bbox_preds, iou_preds, img_metas, cfg, rescale=rescale)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_ld_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import GFLHead, LDHead\n\n\ndef test_ld_head_loss():\n    \"\"\"Tests vfnet head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(type='ATSSAssigner', topk=9, ignore_iof_thr=0.1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n\n    self = LDHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_ld=dict(type='KnowledgeDistillationKLDivLoss', loss_weight=1.0),\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            beta=2.0,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]))\n\n    teacher_model = GFLHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            beta=2.0,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]))\n\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    cls_scores, bbox_preds = self.forward(feat)\n    rand_soft_target = teacher_model.forward(feat)[1]\n\n    # Test that empty ground truth encourages the network to predict\n    # background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                rand_soft_target, img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero, ld loss should\n    # be non-negative but there should be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    empty_ld_loss = sum(empty_gt_losses['loss_ld'])\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_ld_loss.item() >= 0, 'ld loss should be non-negative'\n\n    # When truth is non-empty then both cls and box loss should be nonzero\n    # for random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              rand_soft_target, img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n    onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n\n    gt_bboxes_ignore = gt_bboxes\n\n    # When truth is non-empty but ignored then the cls loss should be nonzero,\n    # but there should be no box loss.\n    ignore_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                 rand_soft_target, img_metas, gt_bboxes_ignore)\n    ignore_cls_loss = sum(ignore_gt_losses['loss_cls'])\n    ignore_box_loss = sum(ignore_gt_losses['loss_bbox'])\n\n    assert ignore_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert ignore_box_loss.item() == 0, 'gt bbox ignored loss should be zero'\n\n    # When truth is non-empty and not ignored then both cls and box loss should\n    # be nonzero for random inputs\n    gt_bboxes_ignore = [torch.randn(1, 4)]\n\n    not_ignore_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes,\n                                     gt_labels, rand_soft_target, img_metas,\n                                     gt_bboxes_ignore)\n    not_ignore_cls_loss = sum(not_ignore_gt_losses['loss_cls'])\n    not_ignore_box_loss = sum(not_ignore_gt_losses['loss_bbox'])\n\n    assert not_ignore_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert not_ignore_box_loss.item(\n    ) > 0, 'gt bbox not ignored loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_mask2former_head.py",
    "content": "import numpy as np\nimport pytest\nimport torch\nfrom mmcv import ConfigDict\n\nfrom mmdet.core.mask import BitmapMasks\nfrom mmdet.models.dense_heads import Mask2FormerHead\n\n\n@pytest.mark.parametrize('num_stuff_classes, \\\n     label_num', [(53, 100), (0, 80)])\ndef test_mask2former_head_loss(num_stuff_classes, label_num):\n    \"\"\"Tests head loss when truth is empty and non-empty.\n\n    Tests head loss as Panoptic Segmentation and Instance Segmentation. Tests\n    forward_train and simple_test with masks and None as gt_semantic_seg\n    \"\"\"\n    self = _init_model(num_stuff_classes)\n    img_metas = [{\n        'batch_input_shape': (128, 160),\n        'pad_shape': (128, 160, 3),\n        'img_shape': (126, 160, 3),\n        'ori_shape': (63, 80, 3)\n    }, {\n        'batch_input_shape': (128, 160),\n        'pad_shape': (128, 160, 3),\n        'img_shape': (120, 160, 3),\n        'ori_shape': (60, 80, 3)\n    }]\n    feats = [\n        torch.rand((2, 64 * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))\n        for i in range(4)\n    ]\n    all_cls_scores, all_mask_preds = self.forward(feats, img_metas)\n    # Test that empty ground truth encourages the network to predict background\n    gt_labels_list = [torch.LongTensor([]), torch.LongTensor([])]\n    gt_masks_list = [\n        torch.zeros((0, 128, 160)).long(),\n        torch.zeros((0, 128, 160)).long()\n    ]\n\n    empty_gt_losses = self.loss(all_cls_scores, all_mask_preds, gt_labels_list,\n                                gt_masks_list, img_metas)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no mask loss.\n    for key, loss in empty_gt_losses.items():\n        if 'cls' in key:\n            assert loss.item() > 0, 'cls loss should be non-zero'\n        elif 'mask' in key:\n            assert loss.item(\n            ) == 0, 'there should be no mask loss when there are no true mask'\n        elif 'dice' in key:\n            assert loss.item(\n            ) == 0, 'there should be no dice loss when there are no true mask'\n\n    # when truth is non-empty then both cls, mask, dice loss should be nonzero\n    # random inputs\n    gt_labels_list = [\n        torch.tensor([10, label_num]).long(),\n        torch.tensor([label_num, 10]).long()\n    ]\n    mask1 = torch.zeros((2, 128, 160)).long()\n    mask1[0, :50] = 1\n    mask1[1, 50:] = 1\n    mask2 = torch.zeros((2, 128, 160)).long()\n    mask2[0, :, :50] = 1\n    mask2[1, :, 50:] = 1\n    gt_masks_list = [mask1, mask2]\n    two_gt_losses = self.loss(all_cls_scores, all_mask_preds, gt_labels_list,\n                              gt_masks_list, img_metas)\n    for loss in two_gt_losses.values():\n        assert loss.item() > 0, 'all loss should be non-zero'\n\n    # test forward_train\n    gt_bboxes = None\n    gt_labels = [\n        torch.tensor([10]).long(),\n        torch.tensor([10]).long(),\n    ]\n    thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32)\n    thing_mask1[0, :50] = 1\n    thing_mask2 = np.zeros((1, 128, 160), dtype=np.int32)\n    thing_mask2[0, :, 50:] = 1\n    gt_masks = [\n        BitmapMasks(thing_mask1, 128, 160),\n        BitmapMasks(thing_mask2, 128, 160),\n    ]\n    stuff_mask1 = torch.zeros((1, 128, 160)).long()\n    stuff_mask1[0, :50] = 10\n    stuff_mask1[0, 50:] = 100\n    stuff_mask2 = torch.zeros((1, 128, 160)).long()\n    stuff_mask2[0, :, 50:] = 10\n    stuff_mask2[0, :, :50] = 100\n    gt_semantic_seg = [stuff_mask1, stuff_mask2]\n\n    self.forward_train(feats, img_metas, gt_bboxes, gt_labels, gt_masks,\n                       gt_semantic_seg)\n\n    # test when gt_semantic_seg is None\n    gt_semantic_seg = None\n    self.forward_train(feats, img_metas, gt_bboxes, gt_labels, gt_masks,\n                       gt_semantic_seg)\n\n    # test inference mode\n    self.simple_test(feats, img_metas)\n\n\ndef _init_model(num_stuff_classes):\n    base_channels = 64\n    num_things_classes = 80\n    num_classes = num_things_classes + num_stuff_classes\n    config = ConfigDict(\n        dict(\n            type='Mask2FormerHead',\n            in_channels=[base_channels * 2**i for i in range(4)],\n            feat_channels=base_channels,\n            out_channels=base_channels,\n            num_things_classes=num_things_classes,\n            num_stuff_classes=num_stuff_classes,\n            num_queries=100,\n            num_transformer_feat_level=3,\n            pixel_decoder=dict(\n                type='MSDeformAttnPixelDecoder',\n                num_outs=3,\n                norm_cfg=dict(type='GN', num_groups=32),\n                act_cfg=dict(type='ReLU'),\n                encoder=dict(\n                    type='DetrTransformerEncoder',\n                    num_layers=6,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        attn_cfgs=dict(\n                            type='MultiScaleDeformableAttention',\n                            embed_dims=base_channels,\n                            num_heads=8,\n                            num_levels=3,\n                            num_points=4,\n                            im2col_step=64,\n                            dropout=0.0,\n                            batch_first=False,\n                            norm_cfg=None,\n                            init_cfg=None),\n                        ffn_cfgs=dict(\n                            type='FFN',\n                            embed_dims=base_channels,\n                            feedforward_channels=base_channels * 4,\n                            num_fcs=2,\n                            ffn_drop=0.0,\n                            act_cfg=dict(type='ReLU', inplace=True)),\n                        feedforward_channels=base_channels * 4,\n                        ffn_dropout=0.0,\n                        operation_order=('self_attn', 'norm', 'ffn', 'norm')),\n                    init_cfg=None),\n                positional_encoding=dict(\n                    type='SinePositionalEncoding',\n                    num_feats=base_channels // 2,\n                    normalize=True),\n                init_cfg=None),\n            enforce_decoder_input_project=False,\n            positional_encoding=dict(\n                type='SinePositionalEncoding',\n                num_feats=base_channels // 2,\n                normalize=True),\n            transformer_decoder=dict(\n                type='DetrTransformerDecoder',\n                return_intermediate=True,\n                num_layers=9,\n                transformerlayers=dict(\n                    type='DetrTransformerDecoderLayer',\n                    attn_cfgs=dict(\n                        type='MultiheadAttention',\n                        embed_dims=base_channels,\n                        num_heads=8,\n                        attn_drop=0.0,\n                        proj_drop=0.0,\n                        dropout_layer=None,\n                        batch_first=False),\n                    ffn_cfgs=dict(\n                        embed_dims=base_channels,\n                        feedforward_channels=base_channels * 8,\n                        num_fcs=2,\n                        act_cfg=dict(type='ReLU', inplace=True),\n                        ffn_drop=0.0,\n                        dropout_layer=None,\n                        add_identity=True),\n                    # the following parameter was not used,\n                    # just make current api happy\n                    feedforward_channels=base_channels * 8,\n                    operation_order=('cross_attn', 'norm', 'self_attn', 'norm',\n                                     'ffn', 'norm')),\n                init_cfg=None),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=False,\n                loss_weight=2.0,\n                reduction='mean',\n                class_weight=[1.0] * num_classes + [0.1]),\n            loss_mask=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=True,\n                reduction='mean',\n                loss_weight=5.0),\n            loss_dice=dict(\n                type='DiceLoss',\n                use_sigmoid=True,\n                activate=True,\n                reduction='mean',\n                naive_dice=True,\n                eps=1.0,\n                loss_weight=5.0),\n            train_cfg=dict(\n                num_points=256,\n                oversample_ratio=3.0,\n                importance_sample_ratio=0.75,\n                assigner=dict(\n                    type='MaskHungarianAssigner',\n                    cls_cost=dict(type='ClassificationCost', weight=2.0),\n                    mask_cost=dict(\n                        type='CrossEntropyLossCost',\n                        weight=5.0,\n                        use_sigmoid=True),\n                    dice_cost=dict(\n                        type='DiceCost', weight=5.0, pred_act=True, eps=1.0)),\n                sampler=dict(type='MaskPseudoSampler')),\n            test_cfg=dict(\n                panoptic_on=True,\n                semantic_on=False,\n                instance_on=True,\n                max_dets_per_image=100,\n                object_mask_thr=0.8,\n                iou_thr=0.8)))\n    self = Mask2FormerHead(**config)\n    self.init_weights()\n\n    return self\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_maskformer_head.py",
    "content": "import numpy as np\nimport torch\nfrom mmcv import ConfigDict\n\nfrom mmdet.core.mask import BitmapMasks\nfrom mmdet.models.dense_heads import MaskFormerHead\n\n\ndef test_maskformer_head_loss():\n    \"\"\"Tests head loss when truth is empty and non-empty.\"\"\"\n    base_channels = 64\n    # batch_input_shape = (128, 160)\n    img_metas = [{\n        'batch_input_shape': (128, 160),\n        'pad_shape': (128, 160, 3),\n        'img_shape': (126, 160, 3),\n        'ori_shape': (63, 80, 3)\n    }, {\n        'batch_input_shape': (128, 160),\n        'pad_shape': (128, 160, 3),\n        'img_shape': (120, 160, 3),\n        'ori_shape': (60, 80, 3)\n    }]\n    feats = [\n        torch.rand((2, 64 * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))\n        for i in range(4)\n    ]\n    num_things_classes = 80\n    num_stuff_classes = 53\n    num_classes = num_things_classes + num_stuff_classes\n    config = ConfigDict(\n        dict(\n            type='MaskFormerHead',\n            in_channels=[base_channels * 2**i for i in range(4)],\n            feat_channels=base_channels,\n            out_channels=base_channels,\n            num_things_classes=num_things_classes,\n            num_stuff_classes=num_stuff_classes,\n            num_queries=100,\n            pixel_decoder=dict(\n                type='TransformerEncoderPixelDecoder',\n                norm_cfg=dict(type='GN', num_groups=32),\n                act_cfg=dict(type='ReLU'),\n                encoder=dict(\n                    type='DetrTransformerEncoder',\n                    num_layers=6,\n                    transformerlayers=dict(\n                        type='BaseTransformerLayer',\n                        attn_cfgs=dict(\n                            type='MultiheadAttention',\n                            embed_dims=base_channels,\n                            num_heads=8,\n                            attn_drop=0.1,\n                            proj_drop=0.1,\n                            dropout_layer=None,\n                            batch_first=False),\n                        ffn_cfgs=dict(\n                            embed_dims=base_channels,\n                            feedforward_channels=base_channels * 8,\n                            num_fcs=2,\n                            act_cfg=dict(type='ReLU', inplace=True),\n                            ffn_drop=0.1,\n                            dropout_layer=None,\n                            add_identity=True),\n                        operation_order=('self_attn', 'norm', 'ffn', 'norm'),\n                        norm_cfg=dict(type='LN'),\n                        init_cfg=None,\n                        batch_first=False),\n                    init_cfg=None),\n                positional_encoding=dict(\n                    type='SinePositionalEncoding',\n                    num_feats=base_channels // 2,\n                    normalize=True)),\n            enforce_decoder_input_project=False,\n            positional_encoding=dict(\n                type='SinePositionalEncoding',\n                num_feats=base_channels // 2,\n                normalize=True),\n            transformer_decoder=dict(\n                type='DetrTransformerDecoder',\n                return_intermediate=True,\n                num_layers=6,\n                transformerlayers=dict(\n                    type='DetrTransformerDecoderLayer',\n                    attn_cfgs=dict(\n                        type='MultiheadAttention',\n                        embed_dims=base_channels,\n                        num_heads=8,\n                        attn_drop=0.1,\n                        proj_drop=0.1,\n                        dropout_layer=None,\n                        batch_first=False),\n                    ffn_cfgs=dict(\n                        embed_dims=base_channels,\n                        feedforward_channels=base_channels * 8,\n                        num_fcs=2,\n                        act_cfg=dict(type='ReLU', inplace=True),\n                        ffn_drop=0.1,\n                        dropout_layer=None,\n                        add_identity=True),\n                    # the following parameter was not used,\n                    # just make current api happy\n                    feedforward_channels=base_channels * 8,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                     'ffn', 'norm')),\n                init_cfg=None),\n            loss_cls=dict(\n                type='CrossEntropyLoss',\n                use_sigmoid=False,\n                loss_weight=1.0,\n                reduction='mean',\n                class_weight=[1.0] * num_classes + [0.1]),\n            loss_mask=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                gamma=2.0,\n                alpha=0.25,\n                reduction='mean',\n                loss_weight=20.0),\n            loss_dice=dict(\n                type='DiceLoss',\n                use_sigmoid=True,\n                activate=True,\n                reduction='mean',\n                naive_dice=True,\n                eps=1.0,\n                loss_weight=1.0),\n            train_cfg=dict(\n                assigner=dict(\n                    type='MaskHungarianAssigner',\n                    cls_cost=dict(type='ClassificationCost', weight=1.0),\n                    mask_cost=dict(\n                        type='FocalLossCost', weight=20.0, binary_input=True),\n                    dice_cost=dict(\n                        type='DiceCost', weight=1.0, pred_act=True, eps=1.0)),\n                sampler=dict(type='MaskPseudoSampler')),\n            test_cfg=dict(object_mask_thr=0.8, iou_thr=0.8)))\n    self = MaskFormerHead(**config)\n    self.init_weights()\n    all_cls_scores, all_mask_preds = self.forward(feats, img_metas)\n    # Test that empty ground truth encourages the network to predict background\n    gt_labels_list = [torch.LongTensor([]), torch.LongTensor([])]\n    gt_masks_list = [\n        torch.zeros((0, 128, 160)).long(),\n        torch.zeros((0, 128, 160)).long()\n    ]\n\n    empty_gt_losses = self.loss(all_cls_scores, all_mask_preds, gt_labels_list,\n                                gt_masks_list, img_metas)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no mask loss.\n    for key, loss in empty_gt_losses.items():\n        if 'cls' in key:\n            assert loss.item() > 0, 'cls loss should be non-zero'\n        elif 'mask' in key:\n            assert loss.item(\n            ) == 0, 'there should be no mask loss when there are no true mask'\n        elif 'dice' in key:\n            assert loss.item(\n            ) == 0, 'there should be no dice loss when there are no true mask'\n\n    # when truth is non-empty then both cls, mask, dice loss should be nonzero\n    # random inputs\n    gt_labels_list = [\n        torch.tensor([10, 100]).long(),\n        torch.tensor([100, 10]).long()\n    ]\n    mask1 = torch.zeros((2, 128, 160)).long()\n    mask1[0, :50] = 1\n    mask1[1, 50:] = 1\n    mask2 = torch.zeros((2, 128, 160)).long()\n    mask2[0, :, :50] = 1\n    mask2[1, :, 50:] = 1\n    gt_masks_list = [mask1, mask2]\n    two_gt_losses = self.loss(all_cls_scores, all_mask_preds, gt_labels_list,\n                              gt_masks_list, img_metas)\n    for loss in two_gt_losses.values():\n        assert loss.item() > 0, 'all loss should be non-zero'\n\n    # test forward_train\n    gt_bboxes = None\n    gt_labels = [\n        torch.tensor([10]).long(),\n        torch.tensor([10]).long(),\n    ]\n    thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32)\n    thing_mask1[0, :50] = 1\n    thing_mask2 = np.zeros((1, 128, 160), dtype=np.int32)\n    thing_mask2[0, :, 50:] = 1\n    gt_masks = [\n        BitmapMasks(thing_mask1, 128, 160),\n        BitmapMasks(thing_mask2, 128, 160),\n    ]\n    stuff_mask1 = torch.zeros((1, 128, 160)).long()\n    stuff_mask1[0, :50] = 10\n    stuff_mask1[0, 50:] = 100\n    stuff_mask2 = torch.zeros((1, 128, 160)).long()\n    stuff_mask2[0, :, 50:] = 10\n    stuff_mask2[0, :, :50] = 100\n    gt_semantic_seg = [stuff_mask1, stuff_mask2]\n\n    self.forward_train(feats, img_metas, gt_bboxes, gt_labels, gt_masks,\n                       gt_semantic_seg)\n\n    # test inference mode\n    self.simple_test(feats, img_metas)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_paa_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.models.dense_heads import PAAHead, paa_head\nfrom mmdet.models.dense_heads.paa_head import levels_to_images\n\n\ndef test_paa_head_loss():\n    \"\"\"Tests paa head loss when truth is empty and non-empty.\"\"\"\n\n    class mock_skm:\n\n        def GaussianMixture(self, *args, **kwargs):\n            return self\n\n        def fit(self, loss):\n            pass\n\n        def predict(self, loss):\n            components = np.zeros_like(loss, dtype=np.long)\n            return components.reshape(-1)\n\n        def score_samples(self, loss):\n            scores = np.random.random(len(loss))\n            return scores\n\n    paa_head.skm = mock_skm()\n\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.1,\n                neg_iou_thr=0.1,\n                min_pos_iou=0,\n                ignore_iof_thr=-1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    # since Focal Loss is not supported on CPU\n    self = PAAHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    self.init_weights()\n    cls_scores, bbox_preds, iou_preds = self(feat)\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,\n                                gt_labels, img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    empty_box_loss = empty_gt_losses['loss_bbox']\n    empty_iou_loss = empty_gt_losses['loss_iou']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_iou_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, iou_preds, gt_bboxes,\n                              gt_labels, img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    onegt_box_loss = one_gt_losses['loss_bbox']\n    onegt_iou_loss = one_gt_losses['loss_iou']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_iou_loss.item() > 0, 'box loss should be non-zero'\n    n, c, h, w = 10, 4, 20, 20\n    mlvl_tensor = [torch.ones(n, c, h, w) for i in range(5)]\n    results = levels_to_images(mlvl_tensor)\n    assert len(results) == n\n    assert results[0].size() == (h * w * 5, c)\n    assert self.with_score_voting\n\n    self = PAAHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8]),\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.3),\n        loss_centerness=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.5))\n    cls_scores = [torch.ones(2, 4, 5, 5)]\n    bbox_preds = [torch.ones(2, 4, 5, 5)]\n    iou_preds = [torch.ones(2, 1, 5, 5)]\n    cfg = mmcv.Config(\n        dict(\n            nms_pre=1000,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.6),\n            max_per_img=100))\n    rescale = False\n    self.get_bboxes(\n        cls_scores, bbox_preds, iou_preds, img_metas, cfg, rescale=rescale)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_pisa_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import PISARetinaHead, PISASSDHead\nfrom mmdet.models.roi_heads import PISARoIHead\n\n\ndef test_pisa_retinanet_head_loss():\n    \"\"\"Tests pisa retinanet head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='RandomSampler',\n                num=256,\n                pos_fraction=0.5,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=False),\n            isr=dict(k=2., bias=0.),\n            carl=dict(k=1., bias=0.2),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False))\n    self = PISARetinaHead(num_classes=4, in_channels=1, train_cfg=cfg)\n\n    # Anchor head expects a multiple levels of features per image\n    feat = [\n        torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))\n        for i in range(len(self.anchor_generator.strides))\n    ]\n    cls_scores, bbox_preds = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls'].sum()\n    empty_box_loss = empty_gt_losses['loss_bbox'].sum()\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls'].sum()\n    onegt_box_loss = one_gt_losses['loss_bbox'].sum()\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n\n\ndef test_pisa_ssd_head_loss():\n    \"\"\"Tests pisa ssd head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.5,\n                min_pos_iou=0.,\n                ignore_iof_thr=-1,\n                gt_max_assign_all=False),\n            isr=dict(k=2., bias=0.),\n            carl=dict(k=1., bias=0.2),\n            smoothl1_beta=1.,\n            allowed_border=-1,\n            pos_weight=-1,\n            neg_pos_ratio=3,\n            debug=False))\n    ssd_anchor_generator = dict(\n        type='SSDAnchorGenerator',\n        scale_major=False,\n        input_size=300,\n        strides=[1],\n        ratios=([2], ),\n        basesize_ratio_range=(0.15, 0.9))\n    self = PISASSDHead(\n        num_classes=4,\n        in_channels=(1, ),\n        train_cfg=cfg,\n        anchor_generator=ssd_anchor_generator)\n\n    # Anchor head expects a multiple levels of features per image\n    feat = [\n        torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))\n        for i in range(len(self.anchor_generator.strides))\n    ]\n    cls_scores, bbox_preds = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    # SSD is special, #pos:#neg = 1: 3, so empth gt will also lead loss cls = 0\n    assert empty_cls_loss.item() == 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n    onegt_box_loss = sum(one_gt_losses['loss_bbox'])\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n\n\ndef test_pisa_roi_head_loss():\n    \"\"\"Tests pisa roi head loss when truth is empty and non-empty.\"\"\"\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.7,\n                neg_iou_thr=0.3,\n                min_pos_iou=0.3,\n                match_low_quality=True,\n                ignore_iof_thr=-1),\n            sampler=dict(\n                type='ScoreHLRSampler',\n                num=4,\n                pos_fraction=0.25,\n                neg_pos_ub=-1,\n                add_gt_as_proposals=True,\n                k=0.5,\n                bias=0.),\n            isr=dict(k=2., bias=0.),\n            carl=dict(k=1., bias=0.2),\n            allowed_border=0,\n            pos_weight=-1,\n            debug=False))\n\n    bbox_roi_extractor = dict(\n        type='SingleRoIExtractor',\n        roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0),\n        out_channels=1,\n        featmap_strides=[1])\n\n    bbox_head = dict(\n        type='Shared2FCBBoxHead',\n        in_channels=1,\n        fc_out_channels=2,\n        roi_feat_size=7,\n        num_classes=4,\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[0., 0., 0., 0.],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        reg_class_agnostic=False,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n        loss_bbox=dict(type='L1Loss', loss_weight=1.0))\n\n    self = PISARoIHead(bbox_roi_extractor, bbox_head, train_cfg=train_cfg)\n\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    # Anchor head expects a multiple levels of features per image\n    feat = [\n        torch.rand(1, 1, s // (2**(i + 2)), s // (2**(i + 2)))\n        for i in range(1)\n    ]\n\n    proposal_list = [\n        torch.Tensor([[22.6667, 22.8757, 238.6326, 151.8874], [0, 3, 5, 7]])\n    ]\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n\n    empty_gt_losses = self.forward_train(feat, img_metas, proposal_list,\n                                         gt_bboxes, gt_labels,\n                                         gt_bboxes_ignore)\n\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls'].sum()\n    empty_box_loss = empty_gt_losses['loss_bbox'].sum()\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n\n    one_gt_losses = self.forward_train(feat, img_metas, proposal_list,\n                                       gt_bboxes, gt_labels, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls'].sum()\n    onegt_box_loss = one_gt_losses['loss_bbox'].sum()\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_sabl_retina_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import SABLRetinaHead\n\n\ndef test_sabl_retina_head_loss():\n    \"\"\"Tests anchor head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n\n    cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='ApproxMaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.0,\n                ignore_iof_thr=-1),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    head = SABLRetinaHead(\n        num_classes=4,\n        in_channels=3,\n        feat_channels=10,\n        loss_cls=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),\n        train_cfg=cfg)\n    if torch.cuda.is_available():\n        head.cuda()\n        # Anchor head expects a multiple levels of features per image\n        feat = [\n            torch.rand(1, 3, s // (2**(i + 2)), s // (2**(i + 2))).cuda()\n            for i in range(len(head.approx_anchor_generator.base_anchors))\n        ]\n        cls_scores, bbox_preds = head.forward(feat)\n\n        # Test that empty ground truth encourages the network\n        # to predict background\n        gt_bboxes = [torch.empty((0, 4)).cuda()]\n        gt_labels = [torch.LongTensor([]).cuda()]\n\n        gt_bboxes_ignore = None\n        empty_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes,\n                                    gt_labels, img_metas, gt_bboxes_ignore)\n        # When there is no truth, the cls loss should be nonzero but there\n        # should be no box loss.\n        empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n        empty_box_cls_loss = sum(empty_gt_losses['loss_bbox_cls'])\n        empty_box_reg_loss = sum(empty_gt_losses['loss_bbox_reg'])\n        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert empty_box_cls_loss.item() == 0, (\n            'there should be no box cls loss when there are no true boxes')\n        assert empty_box_reg_loss.item() == 0, (\n            'there should be no box reg loss when there are no true boxes')\n\n        # When truth is non-empty then both cls and box loss should\n        # be nonzero for random inputs\n        gt_bboxes = [\n            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),\n        ]\n        gt_labels = [torch.LongTensor([2]).cuda()]\n        one_gt_losses = head.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                  img_metas, gt_bboxes_ignore)\n        onegt_cls_loss = sum(one_gt_losses['loss_cls'])\n        onegt_box_cls_loss = sum(one_gt_losses['loss_bbox_cls'])\n        onegt_box_reg_loss = sum(one_gt_losses['loss_bbox_reg'])\n        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert onegt_box_cls_loss.item() > 0, 'box loss cls should be non-zero'\n        assert onegt_box_reg_loss.item() > 0, 'box loss reg should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_solo_head.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet.models.dense_heads import (DecoupledSOLOHead,\n                                      DecoupledSOLOLightHead, SOLOHead)\n\n\ndef test_solo_head_loss():\n    \"\"\"Tests solo head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    self = SOLOHead(\n        num_classes=4,\n        in_channels=1,\n        num_grids=[40, 36, 24, 16, 12],\n        loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    mask_preds, cls_preds = self.forward(feat)\n    # Test that empty ground truth encourages the network to\n    # predict background.\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_masks = [torch.empty((0, 550, 550))]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(\n        mask_preds,\n        cls_preds,\n        gt_labels,\n        gt_masks,\n        img_metas,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_mask_loss = empty_gt_losses['loss_mask']\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_mask_loss.item() == 0, (\n        'there should be no mask loss when there are no true masks')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs.\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    gt_masks = [(torch.rand((1, 256, 256)) > 0.5).float()]\n    one_gt_losses = self.loss(\n        mask_preds,\n        cls_preds,\n        gt_labels,\n        gt_masks,\n        img_metas,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    onegt_mask_loss = one_gt_losses['loss_mask']\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_mask_loss.item() > 0, 'mask loss should be non-zero'\n\n    # When the length of num_grids, scale_ranges, and num_levels are not equal.\n    with pytest.raises(AssertionError):\n        SOLOHead(\n            num_classes=4,\n            in_channels=1,\n            num_grids=[36, 24, 16, 12],\n            loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0),\n            loss_cls=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0))\n\n    # When input feature length is not equal to num_levels.\n    with pytest.raises(AssertionError):\n        feat = [\n            torch.rand(1, 1, s // feat_size, s // feat_size)\n            for feat_size in [4, 8, 16, 32]\n        ]\n        self.forward(feat)\n\n\ndef test_desolo_head_loss():\n    \"\"\"Tests solo head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    self = DecoupledSOLOHead(\n        num_classes=4,\n        in_channels=1,\n        num_grids=[40, 36, 24, 16, 12],\n        loss_mask=dict(\n            type='DiceLoss', use_sigmoid=True, activate=False,\n            loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    mask_preds_x, mask_preds_y, cls_preds = self.forward(feat)\n    # Test that empty ground truth encourages the network to\n    # predict background.\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_masks = [torch.empty((0, 550, 550))]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(\n        mask_preds_x,\n        mask_preds_y,\n        cls_preds,\n        gt_labels,\n        gt_masks,\n        img_metas,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_mask_loss = empty_gt_losses['loss_mask']\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_mask_loss.item() == 0, (\n        'there should be no mask loss when there are no true masks')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs.\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    gt_masks = [(torch.rand((1, 256, 256)) > 0.5).float()]\n    one_gt_losses = self.loss(\n        mask_preds_x,\n        mask_preds_y,\n        cls_preds,\n        gt_labels,\n        gt_masks,\n        img_metas,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    onegt_mask_loss = one_gt_losses['loss_mask']\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_mask_loss.item() > 0, 'mask loss should be non-zero'\n\n    # When the length of num_grids, scale_ranges, and num_levels are not equal.\n    with pytest.raises(AssertionError):\n        DecoupledSOLOHead(\n            num_classes=4,\n            in_channels=1,\n            num_grids=[36, 24, 16, 12],\n            loss_mask=dict(\n                type='DiceLoss',\n                use_sigmoid=True,\n                activate=False,\n                loss_weight=3.0),\n            loss_cls=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0))\n\n    # When input feature length is not equal to num_levels.\n    with pytest.raises(AssertionError):\n        feat = [\n            torch.rand(1, 1, s // feat_size, s // feat_size)\n            for feat_size in [4, 8, 16, 32]\n        ]\n        self.forward(feat)\n\n\ndef test_desolo_light_head_loss():\n    \"\"\"Tests solo head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    self = DecoupledSOLOLightHead(\n        num_classes=4,\n        in_channels=1,\n        num_grids=[40, 36, 24, 16, 12],\n        loss_mask=dict(\n            type='DiceLoss', use_sigmoid=True, activate=False,\n            loss_weight=3.0),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0))\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    mask_preds_x, mask_preds_y, cls_preds = self.forward(feat)\n    # Test that empty ground truth encourages the network to\n    # predict background.\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_masks = [torch.empty((0, 550, 550))]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(\n        mask_preds_x,\n        mask_preds_y,\n        cls_preds,\n        gt_labels,\n        gt_masks,\n        img_metas,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_mask_loss = empty_gt_losses['loss_mask']\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_mask_loss.item() == 0, (\n        'there should be no mask loss when there are no true masks')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs.\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    gt_masks = [(torch.rand((1, 256, 256)) > 0.5).float()]\n    one_gt_losses = self.loss(\n        mask_preds_x,\n        mask_preds_y,\n        cls_preds,\n        gt_labels,\n        gt_masks,\n        img_metas,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    onegt_mask_loss = one_gt_losses['loss_mask']\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_mask_loss.item() > 0, 'mask loss should be non-zero'\n\n    # When the length of num_grids, scale_ranges, and num_levels are not equal.\n    with pytest.raises(AssertionError):\n        DecoupledSOLOLightHead(\n            num_classes=4,\n            in_channels=1,\n            num_grids=[36, 24, 16, 12],\n            loss_mask=dict(type='DiceLoss', use_sigmoid=True, loss_weight=3.0),\n            loss_cls=dict(\n                type='FocalLoss',\n                use_sigmoid=True,\n                gamma=2.0,\n                alpha=0.25,\n                loss_weight=1.0))\n\n    # When input feature length is not equal to num_levels.\n    with pytest.raises(AssertionError):\n        feat = [\n            torch.rand(1, 1, s // feat_size, s // feat_size)\n            for feat_size in [4, 8, 16, 32]\n        ]\n        self.forward(feat)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_tood_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import TOODHead\n\n\ndef test_tood_head_loss():\n    \"\"\"Tests paa head loss when truth is empty and non-empty.\"\"\"\n\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            initial_epoch=4,\n            initial_assigner=dict(type='ATSSAssigner', topk=9),\n            assigner=dict(type='TaskAlignedAssigner', topk=13),\n            alpha=1,\n            beta=6,\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    test_cfg = mmcv.Config(\n        dict(\n            nms_pre=1000,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.6),\n            max_per_img=100))\n    # since Focal Loss is not supported on CPU\n    self = TOODHead(\n        num_classes=80,\n        in_channels=1,\n        stacked_convs=6,\n        feat_channels=256,\n        anchor_type='anchor_free',\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            octave_base_scale=8,\n            scales_per_octave=1,\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        initial_loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            activated=True,  # use probability instead of logit as input\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_cls=dict(\n            type='QualityFocalLoss',\n            use_sigmoid=True,\n            activated=True,  # use probability instead of logit as input\n            beta=2.0,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=2.0),\n        train_cfg=train_cfg,\n        test_cfg=test_cfg)\n    self.init_weights()\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [8, 16, 32, 64, 128]\n    ]\n    cls_scores, bbox_preds = self(feat)\n\n    # test initial assigner and losses\n    self.epoch = 0\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    empty_box_loss = empty_gt_losses['loss_bbox']\n    assert sum(empty_cls_loss).item() > 0, 'cls loss should be non-zero'\n    assert sum(empty_box_loss).item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    onegt_box_loss = one_gt_losses['loss_bbox']\n    assert sum(onegt_cls_loss).item() > 0, 'cls loss should be non-zero'\n    assert sum(onegt_box_loss).item() > 0, 'box loss should be non-zero'\n\n    # test task alignment assigner and losses\n    self.epoch = 10\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    empty_box_loss = empty_gt_losses['loss_bbox']\n    assert sum(empty_cls_loss).item() > 0, 'cls loss should be non-zero'\n    assert sum(empty_box_loss).item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    onegt_box_loss = one_gt_losses['loss_bbox']\n    assert sum(onegt_cls_loss).item() > 0, 'cls loss should be non-zero'\n    assert sum(onegt_box_loss).item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_vfnet_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import VFNetHead\n\n\ndef test_vfnet_head_loss():\n    \"\"\"Tests vfnet head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(type='ATSSAssigner', topk=9),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    # since Focal Loss is not supported on CPU\n    self = VFNetHead(\n        num_classes=4,\n        in_channels=1,\n        train_cfg=train_cfg,\n        loss_cls=dict(type='VarifocalLoss', use_sigmoid=True, loss_weight=1.0))\n    if torch.cuda.is_available():\n        self.cuda()\n        feat = [\n            torch.rand(1, 1, s // feat_size, s // feat_size).cuda()\n            for feat_size in [4, 8, 16, 32, 64]\n        ]\n        cls_scores, bbox_preds, bbox_preds_refine = self.forward(feat)\n        # Test that empty ground truth encourages the network to predict\n        # background\n        gt_bboxes = [torch.empty((0, 4)).cuda()]\n        gt_labels = [torch.LongTensor([]).cuda()]\n        gt_bboxes_ignore = None\n        empty_gt_losses = self.loss(cls_scores, bbox_preds, bbox_preds_refine,\n                                    gt_bboxes, gt_labels, img_metas,\n                                    gt_bboxes_ignore)\n        # When there is no truth, the cls loss should be nonzero but there\n        # should be no box loss.\n        empty_cls_loss = empty_gt_losses['loss_cls']\n        empty_box_loss = empty_gt_losses['loss_bbox']\n        assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert empty_box_loss.item() == 0, (\n            'there should be no box loss when there are no true boxes')\n\n        # When truth is non-empty then both cls and box loss should be nonzero\n        # for random inputs\n        gt_bboxes = [\n            torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]).cuda(),\n        ]\n        gt_labels = [torch.LongTensor([2]).cuda()]\n        one_gt_losses = self.loss(cls_scores, bbox_preds, bbox_preds_refine,\n                                  gt_bboxes, gt_labels, img_metas,\n                                  gt_bboxes_ignore)\n        onegt_cls_loss = one_gt_losses['loss_cls']\n        onegt_box_loss = one_gt_losses['loss_bbox']\n        assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n        assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_yolact_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import YOLACTHead, YOLACTProtonet, YOLACTSegmHead\n\n\ndef test_yolact_head_loss():\n    \"\"\"Tests yolact head losses when truth is empty and non-empty.\"\"\"\n    s = 550\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='MaxIoUAssigner',\n                pos_iou_thr=0.5,\n                neg_iou_thr=0.4,\n                min_pos_iou=0.,\n                ignore_iof_thr=-1,\n                gt_max_assign_all=False),\n            smoothl1_beta=1.,\n            allowed_border=-1,\n            pos_weight=-1,\n            neg_pos_ratio=3,\n            debug=False,\n            min_gt_box_wh=[4.0, 4.0]))\n    bbox_head = YOLACTHead(\n        num_classes=80,\n        in_channels=256,\n        feat_channels=256,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=3,\n            scales_per_octave=1,\n            base_sizes=[8, 16, 32, 64, 128],\n            ratios=[0.5, 1.0, 2.0],\n            strides=[550.0 / x for x in [69, 35, 18, 9, 5]],\n            centers=[(550 * 0.5 / x, 550 * 0.5 / x)\n                     for x in [69, 35, 18, 9, 5]]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]),\n        loss_cls=dict(\n            type='CrossEntropyLoss',\n            use_sigmoid=False,\n            reduction='none',\n            loss_weight=1.0),\n        loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.5),\n        num_head_convs=1,\n        num_protos=32,\n        use_ohem=True,\n        train_cfg=train_cfg)\n    segm_head = YOLACTSegmHead(\n        in_channels=256,\n        num_classes=80,\n        loss_segm=dict(\n            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0))\n    mask_head = YOLACTProtonet(\n        num_classes=80,\n        in_channels=256,\n        num_protos=32,\n        max_masks_to_train=100,\n        loss_mask_weight=6.125)\n    feat = [\n        torch.rand(1, 256, feat_size, feat_size)\n        for feat_size in [69, 35, 18, 9, 5]\n    ]\n    cls_score, bbox_pred, coeff_pred = bbox_head.forward(feat)\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_masks = [torch.empty((0, 550, 550))]\n    gt_bboxes_ignore = None\n    empty_gt_losses, sampling_results = bbox_head.loss(\n        cls_score,\n        bbox_pred,\n        gt_bboxes,\n        gt_labels,\n        img_metas,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = sum(empty_gt_losses['loss_cls'])\n    empty_box_loss = sum(empty_gt_losses['loss_bbox'])\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # Test segm head and mask head\n    segm_head_outs = segm_head(feat[0])\n    empty_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)\n    mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,\n                          sampling_results)\n    empty_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes, img_metas,\n                                     sampling_results)\n    # When there is no truth, the segm and mask loss should be zero.\n    empty_segm_loss = sum(empty_segm_loss['loss_segm'])\n    empty_mask_loss = sum(empty_mask_loss['loss_mask'])\n    assert empty_segm_loss.item() == 0, (\n        'there should be no segm loss when there are no true boxes')\n    assert empty_mask_loss == 0, (\n        'there should be no mask loss when there are no true boxes')\n\n    # When truth is non-empty then cls, box, mask, segm loss should be\n    # nonzero for random inputs.\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    gt_masks = [(torch.rand((1, 550, 550)) > 0.5).float()]\n\n    one_gt_losses, sampling_results = bbox_head.loss(\n        cls_score,\n        bbox_pred,\n        gt_bboxes,\n        gt_labels,\n        img_metas,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    one_gt_cls_loss = sum(one_gt_losses['loss_cls'])\n    one_gt_box_loss = sum(one_gt_losses['loss_bbox'])\n    assert one_gt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert one_gt_box_loss.item() > 0, 'box loss should be non-zero'\n\n    one_gt_segm_loss = segm_head.loss(segm_head_outs, gt_masks, gt_labels)\n    mask_pred = mask_head(feat[0], coeff_pred, gt_bboxes, img_metas,\n                          sampling_results)\n    one_gt_mask_loss = mask_head.loss(mask_pred, gt_masks, gt_bboxes,\n                                      img_metas, sampling_results)\n    one_gt_segm_loss = sum(one_gt_segm_loss['loss_segm'])\n    one_gt_mask_loss = sum(one_gt_mask_loss['loss_mask'])\n    assert one_gt_segm_loss.item() > 0, 'segm loss should be non-zero'\n    assert one_gt_mask_loss.item() > 0, 'mask loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_yolof_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.dense_heads import YOLOFHead\n\n\ndef test_yolof_head_loss():\n    \"\"\"Tests yolof head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='UniformAssigner',\n                pos_ignore_thr=0.15,\n                neg_ignore_thr=0.7),\n            allowed_border=-1,\n            pos_weight=-1,\n            debug=False))\n    self = YOLOFHead(\n        num_classes=4,\n        in_channels=1,\n        reg_decoded_bbox=True,\n        train_cfg=train_cfg,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[1, 2, 4, 8, 16],\n            strides=[32]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1., 1., 1., 1.],\n            add_ctr_clamp=True,\n            ctr_clamp=32),\n        loss_cls=dict(\n            type='FocalLoss',\n            use_sigmoid=True,\n            gamma=2.0,\n            alpha=0.25,\n            loss_weight=1.0),\n        loss_bbox=dict(type='GIoULoss', loss_weight=1.0))\n    feat = [torch.rand(1, 1, s // 32, s // 32)]\n    cls_scores, bbox_preds = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    gt_bboxes_ignore = None\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                                img_metas, gt_bboxes_ignore)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls']\n    empty_box_loss = empty_gt_losses['loss_bbox']\n    assert empty_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, gt_bboxes, gt_labels,\n                              img_metas, gt_bboxes_ignore)\n    onegt_cls_loss = one_gt_losses['loss_cls']\n    onegt_box_loss = one_gt_losses['loss_bbox']\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_dense_heads/test_yolox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\nfrom mmcv.cnn import ConvModule, DepthwiseSeparableConvModule\n\nfrom mmdet.models.dense_heads import YOLOXHead\n\n\ndef test_yolox_head_loss():\n    \"\"\"Tests yolox head loss when truth is empty and non-empty.\"\"\"\n    s = 256\n    img_metas = [{\n        'img_shape': (s, s, 3),\n        'scale_factor': 1,\n        'pad_shape': (s, s, 3)\n    }]\n    train_cfg = mmcv.Config(\n        dict(\n            assigner=dict(\n                type='SimOTAAssigner',\n                center_radius=2.5,\n                candidate_topk=10,\n                iou_weight=3.0,\n                cls_weight=1.0)))\n    self = YOLOXHead(\n        num_classes=4, in_channels=1, use_depthwise=False, train_cfg=train_cfg)\n    assert not self.use_l1\n    assert isinstance(self.multi_level_cls_convs[0][0], ConvModule)\n\n    feat = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16]\n    ]\n    cls_scores, bbox_preds, objectnesses = self.forward(feat)\n\n    # Test that empty ground truth encourages the network to predict background\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, objectnesses,\n                                gt_bboxes, gt_labels, img_metas)\n    # When there is no truth, the cls loss should be nonzero but there should\n    # be no box loss.\n    empty_cls_loss = empty_gt_losses['loss_cls'].sum()\n    empty_box_loss = empty_gt_losses['loss_bbox'].sum()\n    empty_obj_loss = empty_gt_losses['loss_obj'].sum()\n    assert empty_cls_loss.item() == 0, (\n        'there should be no cls loss when there are no true boxes')\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when there are no true boxes')\n    assert empty_obj_loss.item() > 0, 'objectness loss should be non-zero'\n\n    # When truth is non-empty then both cls and box loss should be nonzero for\n    # random inputs\n    self = YOLOXHead(\n        num_classes=4, in_channels=1, use_depthwise=True, train_cfg=train_cfg)\n    assert isinstance(self.multi_level_cls_convs[0][0],\n                      DepthwiseSeparableConvModule)\n    self.use_l1 = True\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    one_gt_losses = self.loss(cls_scores, bbox_preds, objectnesses, gt_bboxes,\n                              gt_labels, img_metas)\n    onegt_cls_loss = one_gt_losses['loss_cls'].sum()\n    onegt_box_loss = one_gt_losses['loss_bbox'].sum()\n    onegt_obj_loss = one_gt_losses['loss_obj'].sum()\n    onegt_l1_loss = one_gt_losses['loss_l1'].sum()\n    assert onegt_cls_loss.item() > 0, 'cls loss should be non-zero'\n    assert onegt_box_loss.item() > 0, 'box loss should be non-zero'\n    assert onegt_obj_loss.item() > 0, 'obj loss should be non-zero'\n    assert onegt_l1_loss.item() > 0, 'l1 loss should be non-zero'\n\n    # Test groud truth out of bound\n    gt_bboxes = [torch.Tensor([[s * 4, s * 4, s * 4 + 10, s * 4 + 10]])]\n    gt_labels = [torch.LongTensor([2])]\n    empty_gt_losses = self.loss(cls_scores, bbox_preds, objectnesses,\n                                gt_bboxes, gt_labels, img_metas)\n    # When gt_bboxes out of bound, the assign results should be empty,\n    # so the cls and bbox loss should be zero.\n    empty_cls_loss = empty_gt_losses['loss_cls'].sum()\n    empty_box_loss = empty_gt_losses['loss_bbox'].sum()\n    empty_obj_loss = empty_gt_losses['loss_obj'].sum()\n    assert empty_cls_loss.item() == 0, (\n        'there should be no cls loss when gt_bboxes out of bound')\n    assert empty_box_loss.item() == 0, (\n        'there should be no box loss when gt_bboxes out of bound')\n    assert empty_obj_loss.item() > 0, 'objectness loss should be non-zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_forward.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"pytest tests/test_forward.py.\"\"\"\nimport copy\nfrom os.path import dirname, exists, join\n\nimport numpy as np\nimport pytest\nimport torch\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection repo\n        repo_dpath = dirname(dirname(dirname(__file__)))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet\n        repo_dpath = dirname(dirname(mmdet.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef _get_detector_cfg(fname):\n    \"\"\"Grab configs necessary to create a detector.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    return model\n\n\ndef _replace_r50_with_r18(model):\n    \"\"\"Replace ResNet50 with ResNet18 in config.\"\"\"\n    model = copy.deepcopy(model)\n    if model.backbone.type == 'ResNet':\n        model.backbone.depth = 18\n        model.backbone.base_channels = 2\n        model.neck.in_channels = [2, 4, 8, 16]\n    return model\n\n\ndef test_sparse_rcnn_forward():\n    config_path = 'sparse_rcnn/sparse_rcnn_r50_fpn_1x_coco.py'\n    model = _get_detector_cfg(config_path)\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n    detector.init_weights()\n    input_shape = (1, 3, 100, 100)\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[5])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    # Test forward train with non-empty truth batch\n    detector.train()\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_bboxes = [item for item in gt_bboxes]\n    gt_labels = mm_inputs['gt_labels']\n    gt_labels = [item for item in gt_labels]\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n    detector.forward_dummy(imgs)\n\n    # Test forward train with an empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_bboxes = [item for item in gt_bboxes]\n    gt_labels = mm_inputs['gt_labels']\n    gt_labels = [item for item in gt_labels]\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      rescale=True,\n                                      return_loss=False)\n            batch_results.append(result)\n\n    # test empty proposal in roi_head\n    with torch.no_grad():\n        # test no proposal in the whole batch\n        detector.roi_head.simple_test([imgs[0][None, :]], torch.empty(\n            (1, 0, 4)), torch.empty((1, 100, 4)), [img_metas[0]],\n                                      torch.ones((1, 4)))\n\n\ndef test_rpn_forward():\n    model = _get_detector_cfg('rpn/rpn_r50_fpn_1x_coco.py')\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 100, 100)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # Test forward train\n    gt_bboxes = mm_inputs['gt_bboxes']\n    losses = detector.forward(\n        imgs, img_metas, gt_bboxes=gt_bboxes, return_loss=True)\n    assert isinstance(losses, dict)\n\n    # Test forward test\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      return_loss=False)\n            batch_results.append(result)\n\n\n@pytest.mark.parametrize(\n    'cfg_file',\n    [\n        'reppoints/reppoints_moment_r50_fpn_1x_coco.py',\n        'retinanet/retinanet_r50_fpn_1x_coco.py',\n        'guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py',\n        'ghm/retinanet_ghm_r50_fpn_1x_coco.py',\n        'fcos/fcos_center_r50_caffe_fpn_gn-head_1x_coco.py',\n        'foveabox/fovea_align_r50_fpn_gn-head_4x4_2x_coco.py',\n        # 'free_anchor/retinanet_free_anchor_r50_fpn_1x_coco.py',\n        # 'atss/atss_r50_fpn_1x_coco.py',  # not ready for topk\n        'yolo/yolov3_mobilenetv2_320_300e_coco.py',\n        'yolox/yolox_tiny_8x8_300e_coco.py'\n    ])\ndef test_single_stage_forward_gpu(cfg_file):\n    if not torch.cuda.is_available():\n        import pytest\n        pytest.skip('test requires GPU and torch+cuda')\n\n    model = _get_detector_cfg(cfg_file)\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (2, 3, 128, 128)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    detector = detector.cuda()\n    imgs = imgs.cuda()\n    # Test forward train\n    gt_bboxes = [b.cuda() for b in mm_inputs['gt_bboxes']]\n    gt_labels = [g.cuda() for g in mm_inputs['gt_labels']]\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      return_loss=False)\n            batch_results.append(result)\n\n\ndef test_faster_rcnn_ohem_forward():\n    model = _get_detector_cfg(\n        'faster_rcnn/faster_rcnn_r50_fpn_ohem_1x_coco.py')\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 100, 100)\n\n    # Test forward train with a non-empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward train with an empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test RoI forward train with an empty proposals\n    feature = detector.extract_feat(imgs[0][None, :])\n    losses = detector.roi_head.forward_train(\n        feature,\n        img_metas, [torch.empty((0, 5))],\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels)\n    assert isinstance(losses, dict)\n\n\n@pytest.mark.parametrize(\n    'cfg_file',\n    [\n        # 'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',\n        'mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py',\n        # 'grid_rcnn/grid_rcnn_r50_fpn_gn-head_2x_coco.py',\n        # 'ms_rcnn/ms_rcnn_r50_fpn_1x_coco.py',\n        # 'htc/htc_r50_fpn_1x_coco.py',\n        # 'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py',\n        # 'scnet/scnet_r50_fpn_20e_coco.py',\n        # 'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py'  # noqa: E501\n    ])\ndef test_two_stage_forward(cfg_file):\n    models_with_semantic = [\n        'htc/htc_r50_fpn_1x_coco.py',\n        'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py',\n        'scnet/scnet_r50_fpn_20e_coco.py',\n    ]\n    if cfg_file in models_with_semantic:\n        with_semantic = True\n    else:\n        with_semantic = False\n\n    model = _get_detector_cfg(cfg_file)\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n\n    # Save cost\n    if cfg_file in [\n            'seesaw_loss/mask_rcnn_r50_fpn_random_seesaw_loss_normed_mask_mstrain_2x_lvis_v1.py'  # noqa: E501\n    ]:\n        model.roi_head.bbox_head.num_classes = 80\n        model.roi_head.bbox_head.loss_cls.num_classes = 80\n        model.roi_head.mask_head.num_classes = 80\n        model.test_cfg.rcnn.score_thr = 0.05\n        model.test_cfg.rcnn.max_per_img = 100\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 128, 128)\n\n    # Test forward train with a non-empty truth batch\n    mm_inputs = _demo_mm_inputs(\n        input_shape, num_items=[10], with_semantic=with_semantic)\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    loss.requires_grad_(True)\n    assert float(loss.item()) > 0\n    loss.backward()\n\n    # Test forward train with an empty truth batch\n    mm_inputs = _demo_mm_inputs(\n        input_shape, num_items=[0], with_semantic=with_semantic)\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    losses = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    loss.requires_grad_(True)\n    assert float(loss.item()) > 0\n    loss.backward()\n\n    # Test RoI forward train with an empty proposals\n    if cfg_file in [\n            'panoptic_fpn/panoptic_fpn_r50_fpn_1x_coco.py'  # noqa: E501\n    ]:\n        mm_inputs.pop('gt_semantic_seg')\n\n    feature = detector.extract_feat(imgs[0][None, :])\n    losses = detector.roi_head.forward_train(feature, img_metas,\n                                             [torch.empty(\n                                                 (0, 5))], **mm_inputs)\n    assert isinstance(losses, dict)\n\n    # Test forward test\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      return_loss=False)\n            batch_results.append(result)\n    cascade_models = [\n        'cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py',\n        'htc/htc_r50_fpn_1x_coco.py',\n        'scnet/scnet_r50_fpn_20e_coco.py',\n    ]\n    # test empty proposal in roi_head\n    with torch.no_grad():\n        # test no proposal in the whole batch\n        detector.simple_test(\n            imgs[0][None, :], [img_metas[0]], proposals=[torch.empty((0, 4))])\n\n        # test no proposal of aug\n        features = detector.extract_feats([imgs[0][None, :]] * 2)\n        detector.roi_head.aug_test(features, [torch.empty((0, 4))] * 2,\n                                   [[img_metas[0]]] * 2)\n\n        # test rcnn_test_cfg is None\n        if cfg_file not in cascade_models:\n            feature = detector.extract_feat(imgs[0][None, :])\n            bboxes, scores = detector.roi_head.simple_test_bboxes(\n                feature, [img_metas[0]], [torch.empty((0, 4))], None)\n            assert all([bbox.shape == torch.Size((0, 4)) for bbox in bboxes])\n            assert all([\n                score.shape == torch.Size(\n                    (0, detector.roi_head.bbox_head.fc_cls.out_features))\n                for score in scores\n            ])\n\n        # test no proposal in the some image\n        x1y1 = torch.randint(1, 100, (10, 2)).float()\n        # x2y2 must be greater than x1y1\n        x2y2 = x1y1 + torch.randint(1, 100, (10, 2))\n        detector.simple_test(\n            imgs[0][None, :].repeat(2, 1, 1, 1), [img_metas[0]] * 2,\n            proposals=[torch.empty((0, 4)),\n                       torch.cat([x1y1, x2y2], dim=-1)])\n\n        # test no proposal of aug\n        detector.roi_head.aug_test(\n            features, [torch.cat([x1y1, x2y2], dim=-1),\n                       torch.empty((0, 4))], [[img_metas[0]]] * 2)\n\n        # test rcnn_test_cfg is None\n        if cfg_file not in cascade_models:\n            feature = detector.extract_feat(imgs[0][None, :].repeat(\n                2, 1, 1, 1))\n            bboxes, scores = detector.roi_head.simple_test_bboxes(\n                feature, [img_metas[0]] * 2,\n                [torch.empty((0, 4)),\n                 torch.cat([x1y1, x2y2], dim=-1)], None)\n            assert bboxes[0].shape == torch.Size((0, 4))\n            assert scores[0].shape == torch.Size(\n                (0, detector.roi_head.bbox_head.fc_cls.out_features))\n\n\n@pytest.mark.parametrize(\n    'cfg_file', ['ghm/retinanet_ghm_r50_fpn_1x_coco.py', 'ssd/ssd300_coco.py'])\ndef test_single_stage_forward_cpu(cfg_file):\n    model = _get_detector_cfg(cfg_file)\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 300, 300)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # Test forward train\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      return_loss=False)\n            batch_results.append(result)\n\n\ndef _demo_mm_inputs(input_shape=(1, 3, 300, 300),\n                    num_items=None, num_classes=10,\n                    with_semantic=False):  # yapf: disable\n    \"\"\"Create a superset of inputs needed to run test or train batches.\n\n    Args:\n        input_shape (tuple):\n            input batch dimensions\n\n        num_items (None | List[int]):\n            specifies the number of boxes in each batch item\n\n        num_classes (int):\n            number of different labels a box might have\n    \"\"\"\n    from mmdet.core import BitmapMasks\n\n    (N, C, H, W) = input_shape\n\n    rng = np.random.RandomState(0)\n\n    imgs = rng.rand(*input_shape)\n\n    img_metas = [{\n        'img_shape': (H, W, C),\n        'ori_shape': (H, W, C),\n        'pad_shape': (H, W, C),\n        'filename': '<demo>.png',\n        'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]),\n        'flip': False,\n        'flip_direction': None,\n    } for _ in range(N)]\n\n    gt_bboxes = []\n    gt_labels = []\n    gt_masks = []\n\n    for batch_idx in range(N):\n        if num_items is None:\n            num_boxes = rng.randint(1, 10)\n        else:\n            num_boxes = num_items[batch_idx]\n\n        cx, cy, bw, bh = rng.rand(num_boxes, 4).T\n\n        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)\n        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)\n        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)\n        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)\n\n        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T\n        class_idxs = rng.randint(1, num_classes, size=num_boxes)\n\n        gt_bboxes.append(torch.FloatTensor(boxes))\n        gt_labels.append(torch.LongTensor(class_idxs))\n\n    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)\n    gt_masks.append(BitmapMasks(mask, H, W))\n\n    mm_inputs = {\n        'imgs': torch.FloatTensor(imgs).requires_grad_(True),\n        'img_metas': img_metas,\n        'gt_bboxes': gt_bboxes,\n        'gt_labels': gt_labels,\n        'gt_bboxes_ignore': None,\n        'gt_masks': gt_masks,\n    }\n\n    if with_semantic:\n        # assume gt_semantic_seg using scale 1/8 of the img\n        gt_semantic_seg = np.random.randint(\n            0, num_classes, (1, 1, H // 8, W // 8), dtype=np.uint8)\n        mm_inputs.update(\n            {'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)})\n\n    return mm_inputs\n\n\ndef test_yolact_forward():\n    model = _get_detector_cfg('yolact/yolact_r50_1x8_coco.py')\n    model = _replace_r50_with_r18(model)\n    model.backbone.init_cfg = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 100, 100)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # Test forward train\n    detector.train()\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    gt_masks = mm_inputs['gt_masks']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        gt_masks=gt_masks,\n        return_loss=True)\n    assert isinstance(losses, dict)\n\n    # Test forward dummy for get_flops\n    detector.forward_dummy(imgs)\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      rescale=True,\n                                      return_loss=False)\n            batch_results.append(result)\n\n\ndef test_detr_forward():\n    model = _get_detector_cfg('detr/detr_r50_8x2_150e_coco.py')\n    model.backbone.depth = 18\n    model.bbox_head.in_channels = 512\n    model.backbone.init_cfg = None\n\n    from mmdet.models import build_detector\n    detector = build_detector(model)\n\n    input_shape = (1, 3, 100, 100)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # Test forward train with non-empty truth batch\n    detector.train()\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward train with an empty truth batch\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[0])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    losses = detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in imgs]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      rescale=True,\n                                      return_loss=False)\n            batch_results.append(result)\n\n\ndef test_inference_detector():\n    from mmcv import ConfigDict\n\n    from mmdet.apis import inference_detector\n    from mmdet.models import build_detector\n\n    # small RetinaNet\n    num_class = 3\n    model_dict = dict(\n        type='RetinaNet',\n        backbone=dict(\n            type='ResNet',\n            depth=18,\n            num_stages=4,\n            out_indices=(3, ),\n            norm_cfg=dict(type='BN', requires_grad=False),\n            norm_eval=True,\n            style='pytorch'),\n        neck=None,\n        bbox_head=dict(\n            type='RetinaHead',\n            num_classes=num_class,\n            in_channels=512,\n            stacked_convs=1,\n            feat_channels=256,\n            anchor_generator=dict(\n                type='AnchorGenerator',\n                octave_base_scale=4,\n                scales_per_octave=3,\n                ratios=[0.5],\n                strides=[32]),\n            bbox_coder=dict(\n                type='DeltaXYWHBBoxCoder',\n                target_means=[.0, .0, .0, .0],\n                target_stds=[1.0, 1.0, 1.0, 1.0]),\n        ),\n        test_cfg=dict(\n            nms_pre=1000,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100))\n\n    rng = np.random.RandomState(0)\n    img1 = rng.rand(100, 100, 3)\n    img2 = rng.rand(100, 100, 3)\n\n    model = build_detector(ConfigDict(model_dict))\n    config = _get_config_module('retinanet/retinanet_r50_fpn_1x_coco.py')\n    model.cfg = config\n    # test single image\n    result = inference_detector(model, img1)\n    assert len(result) == num_class\n    # test multiple image\n    result = inference_detector(model, [img1, img2])\n    assert len(result) == 2 and len(result[0]) == num_class\n\n\ndef test_yolox_random_size():\n    from mmdet.models import build_detector\n    model = _get_detector_cfg('yolox/yolox_tiny_8x8_300e_coco.py')\n    model.random_size_range = (2, 2)\n    model.input_size = (64, 96)\n    model.random_size_interval = 1\n\n    detector = build_detector(model)\n    input_shape = (1, 3, 64, 64)\n    mm_inputs = _demo_mm_inputs(input_shape)\n\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # Test forward train with non-empty truth batch\n    detector.train()\n    gt_bboxes = mm_inputs['gt_bboxes']\n    gt_labels = mm_inputs['gt_labels']\n    detector.forward(\n        imgs,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        return_loss=True)\n    assert detector._input_size == (64, 96)\n\n\ndef test_maskformer_forward():\n    model_cfg = _get_detector_cfg(\n        'maskformer/maskformer_r50_mstrain_16x1_75e_coco.py')\n    base_channels = 32\n    model_cfg.backbone.depth = 18\n    model_cfg.backbone.init_cfg = None\n    model_cfg.backbone.base_channels = base_channels\n    model_cfg.panoptic_head.in_channels = [\n        base_channels * 2**i for i in range(4)\n    ]\n    model_cfg.panoptic_head.feat_channels = base_channels\n    model_cfg.panoptic_head.out_channels = base_channels\n    model_cfg.panoptic_head.pixel_decoder.encoder.\\\n        transformerlayers.attn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.pixel_decoder.encoder.\\\n        transformerlayers.ffn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.pixel_decoder.encoder.\\\n        transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8\n    model_cfg.panoptic_head.pixel_decoder.\\\n        positional_encoding.num_feats = base_channels // 2\n    model_cfg.panoptic_head.positional_encoding.\\\n        num_feats = base_channels // 2\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.attn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.ffn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.feedforward_channels = base_channels * 8\n\n    from mmdet.core import BitmapMasks\n    from mmdet.models import build_detector\n    detector = build_detector(model_cfg)\n\n    # Test forward train with non-empty truth batch\n    detector.train()\n    img_metas = [\n        {\n            'batch_input_shape': (128, 160),\n            'img_shape': (126, 160, 3),\n            'ori_shape': (63, 80, 3),\n            'pad_shape': (128, 160, 3)\n        },\n    ]\n    img = torch.rand((1, 3, 128, 160))\n    gt_bboxes = None\n    gt_labels = [\n        torch.tensor([10]).long(),\n    ]\n    thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32)\n    thing_mask1[0, :50] = 1\n    gt_masks = [\n        BitmapMasks(thing_mask1, 128, 160),\n    ]\n    stuff_mask1 = torch.zeros((1, 128, 160)).long()\n    stuff_mask1[0, :50] = 10\n    stuff_mask1[0, 50:] = 100\n    gt_semantic_seg = [\n        stuff_mask1,\n    ]\n    losses = detector.forward(\n        img=img,\n        img_metas=img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        gt_masks=gt_masks,\n        gt_semantic_seg=gt_semantic_seg,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward train with an empty truth batch\n    gt_bboxes = [\n        torch.empty((0, 4)).float(),\n    ]\n    gt_labels = [\n        torch.empty((0, )).long(),\n    ]\n    mask = np.zeros((0, 128, 160), dtype=np.uint8)\n    gt_masks = [\n        BitmapMasks(mask, 128, 160),\n    ]\n    gt_semantic_seg = [\n        torch.randint(0, 133, (0, 128, 160)),\n    ]\n    losses = detector.forward(\n        img,\n        img_metas,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels,\n        gt_masks=gt_masks,\n        gt_semantic_seg=gt_semantic_seg,\n        return_loss=True)\n    assert isinstance(losses, dict)\n    loss, _ = detector._parse_losses(losses)\n    assert float(loss.item()) > 0\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in img]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      rescale=True,\n                                      return_loss=False)\n        batch_results.append(result)\n\n\n@pytest.mark.parametrize('cfg_file', [\n    'mask2former/mask2former_r50_lsj_8x2_50e_coco.py',\n    'mask2former/mask2former_r50_lsj_8x2_50e_coco-panoptic.py'\n])\ndef test_mask2former_forward(cfg_file):\n    # Test Panoptic Segmentation and Instance Segmentation\n    model_cfg = _get_detector_cfg(cfg_file)\n    base_channels = 32\n    model_cfg.backbone.depth = 18\n    model_cfg.backbone.init_cfg = None\n    model_cfg.backbone.base_channels = base_channels\n    model_cfg.panoptic_head.in_channels = [\n        base_channels * 2**i for i in range(4)\n    ]\n    model_cfg.panoptic_head.feat_channels = base_channels\n    model_cfg.panoptic_head.out_channels = base_channels\n    model_cfg.panoptic_head.pixel_decoder.encoder.\\\n        transformerlayers.attn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.pixel_decoder.encoder.\\\n        transformerlayers.ffn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.pixel_decoder.encoder.\\\n        transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 4\n    model_cfg.panoptic_head.pixel_decoder.\\\n        positional_encoding.num_feats = base_channels // 2\n    model_cfg.panoptic_head.positional_encoding.\\\n        num_feats = base_channels // 2\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.attn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.ffn_cfgs.embed_dims = base_channels\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.ffn_cfgs.feedforward_channels = base_channels * 8\n    model_cfg.panoptic_head.transformer_decoder.\\\n        transformerlayers.feedforward_channels = base_channels * 8\n\n    num_stuff_classes = model_cfg.panoptic_head.num_stuff_classes\n\n    from mmdet.core import BitmapMasks\n    from mmdet.models import build_detector\n    detector = build_detector(model_cfg)\n\n    def _forward_train():\n        losses = detector.forward(\n            img,\n            img_metas,\n            gt_bboxes=gt_bboxes,\n            gt_labels=gt_labels,\n            gt_masks=gt_masks,\n            gt_semantic_seg=gt_semantic_seg,\n            return_loss=True)\n        assert isinstance(losses, dict)\n        loss, _ = detector._parse_losses(losses)\n        assert float(loss.item()) > 0\n\n    # Test forward train with non-empty truth batch\n    detector.train()\n    img_metas = [\n        {\n            'batch_input_shape': (128, 160),\n            'img_shape': (126, 160, 3),\n            'ori_shape': (63, 80, 3),\n            'pad_shape': (128, 160, 3)\n        },\n    ]\n    img = torch.rand((1, 3, 128, 160))\n    gt_bboxes = None\n    gt_labels = [\n        torch.tensor([10]).long(),\n    ]\n    thing_mask1 = np.zeros((1, 128, 160), dtype=np.int32)\n    thing_mask1[0, :50] = 1\n    gt_masks = [\n        BitmapMasks(thing_mask1, 128, 160),\n    ]\n    stuff_mask1 = torch.zeros((1, 128, 160)).long()\n    stuff_mask1[0, :50] = 10\n    stuff_mask1[0, 50:] = 100\n    gt_semantic_seg = [\n        stuff_mask1,\n    ]\n    _forward_train()\n\n    # Test forward train with non-empty truth batch and gt_semantic_seg=None\n    gt_semantic_seg = None\n    _forward_train()\n\n    # Test forward train with an empty truth batch\n    gt_bboxes = [\n        torch.empty((0, 4)).float(),\n    ]\n    gt_labels = [\n        torch.empty((0, )).long(),\n    ]\n    mask = np.zeros((0, 128, 160), dtype=np.uint8)\n    gt_masks = [\n        BitmapMasks(mask, 128, 160),\n    ]\n    gt_semantic_seg = [\n        torch.randint(0, 133, (0, 128, 160)),\n    ]\n    _forward_train()\n\n    # Test forward train with an empty truth batch and gt_semantic_seg=None\n    gt_semantic_seg = None\n    _forward_train()\n\n    # Test forward test\n    detector.eval()\n    with torch.no_grad():\n        img_list = [g[None, :] for g in img]\n        batch_results = []\n        for one_img, one_meta in zip(img_list, img_metas):\n            result = detector.forward([one_img], [[one_meta]],\n                                      rescale=True,\n                                      return_loss=False)\n\n            if num_stuff_classes > 0:\n                assert isinstance(result[0], dict)\n            else:\n                assert isinstance(result[0], tuple)\n\n        batch_results.append(result)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_loss.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom mmcv.utils import digit_version\n\nfrom mmdet.models.losses import (BalancedL1Loss, CrossEntropyLoss, DiceLoss,\n                                 DistributionFocalLoss, FocalLoss,\n                                 GaussianFocalLoss,\n                                 KnowledgeDistillationKLDivLoss, L1Loss,\n                                 MSELoss, QualityFocalLoss, SeesawLoss,\n                                 SmoothL1Loss, VarifocalLoss)\nfrom mmdet.models.losses.ghm_loss import GHMC, GHMR\nfrom mmdet.models.losses.iou_loss import (BoundedIoULoss, CIoULoss, DIoULoss,\n                                          GIoULoss, IoULoss)\n\n\n@pytest.mark.parametrize(\n    'loss_class', [IoULoss, BoundedIoULoss, GIoULoss, DIoULoss, CIoULoss])\ndef test_iou_type_loss_zeros_weight(loss_class):\n    pred = torch.rand((10, 4))\n    target = torch.rand((10, 4))\n    weight = torch.zeros(10)\n\n    loss = loss_class()(pred, target, weight)\n    assert loss == 0.\n\n\n@pytest.mark.parametrize('loss_class', [\n    BalancedL1Loss, BoundedIoULoss, CIoULoss, CrossEntropyLoss, DIoULoss,\n    FocalLoss, DistributionFocalLoss, MSELoss, SeesawLoss, GaussianFocalLoss,\n    GIoULoss, IoULoss, L1Loss, QualityFocalLoss, VarifocalLoss, GHMR, GHMC,\n    SmoothL1Loss, KnowledgeDistillationKLDivLoss, DiceLoss\n])\ndef test_loss_with_reduction_override(loss_class):\n    pred = torch.rand((10, 4))\n    target = torch.rand((10, 4)),\n    weight = None\n\n    with pytest.raises(AssertionError):\n        # only reduction_override from [None, 'none', 'mean', 'sum']\n        # is not allowed\n        reduction_override = True\n        loss_class()(\n            pred, target, weight, reduction_override=reduction_override)\n\n\n@pytest.mark.parametrize('loss_class', [\n    IoULoss, BoundedIoULoss, GIoULoss, DIoULoss, CIoULoss, MSELoss, L1Loss,\n    SmoothL1Loss, BalancedL1Loss\n])\n@pytest.mark.parametrize('input_shape', [(10, 4), (0, 4)])\ndef test_regression_losses(loss_class, input_shape):\n    pred = torch.rand(input_shape)\n    target = torch.rand(input_shape)\n    weight = torch.rand(input_shape)\n\n    # Test loss forward\n    loss = loss_class()(pred, target)\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with weight\n    loss = loss_class()(pred, target, weight)\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with reduction_override\n    loss = loss_class()(pred, target, reduction_override='mean')\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with avg_factor\n    loss = loss_class()(pred, target, avg_factor=10)\n    assert isinstance(loss, torch.Tensor)\n\n    with pytest.raises(ValueError):\n        # loss can evaluate with avg_factor only if\n        # reduction is None, 'none' or 'mean'.\n        reduction_override = 'sum'\n        loss_class()(\n            pred, target, avg_factor=10, reduction_override=reduction_override)\n\n    # Test loss forward with avg_factor and reduction\n    for reduction_override in [None, 'none', 'mean']:\n        loss_class()(\n            pred, target, avg_factor=10, reduction_override=reduction_override)\n        assert isinstance(loss, torch.Tensor)\n\n\n@pytest.mark.parametrize('loss_class', [FocalLoss, CrossEntropyLoss])\n@pytest.mark.parametrize('input_shape', [(10, 5), (0, 5)])\ndef test_classification_losses(loss_class, input_shape):\n    if input_shape[0] == 0 and digit_version(\n            torch.__version__) < digit_version('1.5.0'):\n        pytest.skip(\n            f'CELoss in PyTorch {torch.__version__} does not support empty'\n            f'tensor.')\n\n    pred = torch.rand(input_shape)\n    target = torch.randint(0, 5, (input_shape[0], ))\n\n    # Test loss forward\n    loss = loss_class()(pred, target)\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with reduction_override\n    loss = loss_class()(pred, target, reduction_override='mean')\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with avg_factor\n    loss = loss_class()(pred, target, avg_factor=10)\n    assert isinstance(loss, torch.Tensor)\n\n    with pytest.raises(ValueError):\n        # loss can evaluate with avg_factor only if\n        # reduction is None, 'none' or 'mean'.\n        reduction_override = 'sum'\n        loss_class()(\n            pred, target, avg_factor=10, reduction_override=reduction_override)\n\n    # Test loss forward with avg_factor and reduction\n    for reduction_override in [None, 'none', 'mean']:\n        loss_class()(\n            pred, target, avg_factor=10, reduction_override=reduction_override)\n        assert isinstance(loss, torch.Tensor)\n\n\n@pytest.mark.parametrize('loss_class', [GHMR])\n@pytest.mark.parametrize('input_shape', [(10, 4), (0, 4)])\ndef test_GHMR_loss(loss_class, input_shape):\n    pred = torch.rand(input_shape)\n    target = torch.rand(input_shape)\n    weight = torch.rand(input_shape)\n\n    # Test loss forward\n    loss = loss_class()(pred, target, weight)\n    assert isinstance(loss, torch.Tensor)\n\n\n@pytest.mark.parametrize('use_sigmoid', [True, False])\n@pytest.mark.parametrize('reduction', ['sum', 'mean', None])\n@pytest.mark.parametrize('avg_non_ignore', [True, False])\ndef test_loss_with_ignore_index(use_sigmoid, reduction, avg_non_ignore):\n    # Test cross_entropy loss\n    loss_class = CrossEntropyLoss(\n        use_sigmoid=use_sigmoid,\n        use_mask=False,\n        ignore_index=255,\n        avg_non_ignore=avg_non_ignore)\n    pred = torch.rand((10, 5))\n    target = torch.randint(0, 5, (10, ))\n\n    ignored_indices = torch.randint(0, 10, (2, ), dtype=torch.long)\n    target[ignored_indices] = 255\n\n    # Test loss forward with default ignore\n    loss_with_ignore = loss_class(pred, target, reduction_override=reduction)\n    assert isinstance(loss_with_ignore, torch.Tensor)\n\n    # Test loss forward with forward ignore\n    target[ignored_indices] = 255\n    loss_with_forward_ignore = loss_class(\n        pred, target, ignore_index=255, reduction_override=reduction)\n    assert isinstance(loss_with_forward_ignore, torch.Tensor)\n\n    # Verify correctness\n    if avg_non_ignore:\n        # manually remove the ignored elements\n        not_ignored_indices = (target != 255)\n        pred = pred[not_ignored_indices]\n        target = target[not_ignored_indices]\n    loss = loss_class(pred, target, reduction_override=reduction)\n\n    assert torch.allclose(loss, loss_with_ignore)\n    assert torch.allclose(loss, loss_with_forward_ignore)\n\n    # test ignore all target\n    pred = torch.rand((10, 5))\n    target = torch.ones((10, ), dtype=torch.long) * 255\n    loss = loss_class(pred, target, reduction_override=reduction)\n    assert loss == 0\n\n\n@pytest.mark.parametrize('naive_dice', [True, False])\ndef test_dice_loss(naive_dice):\n    loss_class = DiceLoss\n    pred = torch.rand((10, 4, 4))\n    target = torch.rand((10, 4, 4))\n    weight = torch.rand((10))\n\n    # Test loss forward\n    loss = loss_class(naive_dice=naive_dice)(pred, target)\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with weight\n    loss = loss_class(naive_dice=naive_dice)(pred, target, weight)\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with reduction_override\n    loss = loss_class(naive_dice=naive_dice)(\n        pred, target, reduction_override='mean')\n    assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with avg_factor\n    loss = loss_class(naive_dice=naive_dice)(pred, target, avg_factor=10)\n    assert isinstance(loss, torch.Tensor)\n\n    with pytest.raises(ValueError):\n        # loss can evaluate with avg_factor only if\n        # reduction is None, 'none' or 'mean'.\n        reduction_override = 'sum'\n        loss_class(naive_dice=naive_dice)(\n            pred, target, avg_factor=10, reduction_override=reduction_override)\n\n    # Test loss forward with avg_factor and reduction\n    for reduction_override in [None, 'none', 'mean']:\n        loss_class(naive_dice=naive_dice)(\n            pred, target, avg_factor=10, reduction_override=reduction_override)\n        assert isinstance(loss, torch.Tensor)\n\n    # Test loss forward with has_acted=False and use_sigmoid=False\n    with pytest.raises(NotImplementedError):\n        loss_class(\n            use_sigmoid=False, activate=True, naive_dice=naive_dice)(pred,\n                                                                     target)\n\n    # Test loss forward with weight.ndim != loss.ndim\n    with pytest.raises(AssertionError):\n        weight = torch.rand((2, 8))\n        loss_class(naive_dice=naive_dice)(pred, target, weight)\n\n    # Test loss forward with len(weight) != len(pred)\n    with pytest.raises(AssertionError):\n        weight = torch.rand((8))\n        loss_class(naive_dice=naive_dice)(pred, target, weight)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_loss_compatibility.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"pytest tests/test_loss_compatibility.py.\"\"\"\nimport copy\nfrom os.path import dirname, exists, join\n\nimport numpy as np\nimport pytest\nimport torch\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection repo\n        repo_dpath = dirname(dirname(dirname(__file__)))\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet\n        repo_dpath = dirname(dirname(mmdet.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _get_config_module(fname):\n    \"\"\"Load a configuration as a python module.\"\"\"\n    from mmcv import Config\n    config_dpath = _get_config_directory()\n    config_fpath = join(config_dpath, fname)\n    config_mod = Config.fromfile(config_fpath)\n    return config_mod\n\n\ndef _get_detector_cfg(fname):\n    \"\"\"Grab configs necessary to create a detector.\n\n    These are deep copied to allow for safe modification of parameters without\n    influencing other tests.\n    \"\"\"\n    config = _get_config_module(fname)\n    model = copy.deepcopy(config.model)\n    return model\n\n\n@pytest.mark.parametrize('loss_bbox', [\n    dict(type='L1Loss', loss_weight=1.0),\n    dict(type='GHMR', mu=0.02, bins=10, momentum=0.7, loss_weight=10.0),\n    dict(type='IoULoss', loss_weight=1.0),\n    dict(type='BoundedIoULoss', loss_weight=1.0),\n    dict(type='GIoULoss', loss_weight=1.0),\n    dict(type='DIoULoss', loss_weight=1.0),\n    dict(type='CIoULoss', loss_weight=1.0),\n    dict(type='MSELoss', loss_weight=1.0),\n    dict(type='SmoothL1Loss', loss_weight=1.0),\n    dict(type='BalancedL1Loss', loss_weight=1.0)\n])\ndef test_bbox_loss_compatibility(loss_bbox):\n    \"\"\"Test loss_bbox compatibility.\n\n    Using Faster R-CNN as a sample, modifying the loss function in the config\n    file to verify the compatibility of Loss APIS\n    \"\"\"\n    # Faster R-CNN config dict\n    config_path = '_base_/models/faster_rcnn_r50_fpn.py'\n    cfg_model = _get_detector_cfg(config_path)\n\n    input_shape = (1, 3, 256, 256)\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    if 'IoULoss' in loss_bbox['type']:\n        cfg_model.roi_head.bbox_head.reg_decoded_bbox = True\n\n    cfg_model.roi_head.bbox_head.loss_bbox = loss_bbox\n\n    from mmdet.models import build_detector\n    detector = build_detector(cfg_model)\n\n    loss = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs)\n    assert isinstance(loss, dict)\n    loss, _ = detector._parse_losses(loss)\n    assert float(loss.item()) > 0\n\n\n@pytest.mark.parametrize('loss_cls', [\n    dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0),\n    dict(\n        type='FocalLoss',\n        use_sigmoid=True,\n        gamma=2.0,\n        alpha=0.25,\n        loss_weight=1.0),\n    dict(\n        type='GHMC', bins=30, momentum=0.75, use_sigmoid=True, loss_weight=1.0)\n])\ndef test_cls_loss_compatibility(loss_cls):\n    \"\"\"Test loss_cls compatibility.\n\n    Using Faster R-CNN as a sample, modifying the loss function in the config\n    file to verify the compatibility of Loss APIS\n    \"\"\"\n    # Faster R-CNN config dict\n    config_path = '_base_/models/faster_rcnn_r50_fpn.py'\n    cfg_model = _get_detector_cfg(config_path)\n\n    input_shape = (1, 3, 256, 256)\n    mm_inputs = _demo_mm_inputs(input_shape, num_items=[10])\n    imgs = mm_inputs.pop('imgs')\n    img_metas = mm_inputs.pop('img_metas')\n\n    # verify class loss function compatibility\n    # for loss_cls in loss_clses:\n    cfg_model.roi_head.bbox_head.loss_cls = loss_cls\n\n    from mmdet.models import build_detector\n    detector = build_detector(cfg_model)\n\n    loss = detector.forward(imgs, img_metas, return_loss=True, **mm_inputs)\n    assert isinstance(loss, dict)\n    loss, _ = detector._parse_losses(loss)\n    assert float(loss.item()) > 0\n\n\ndef _demo_mm_inputs(input_shape=(1, 3, 300, 300),\n                    num_items=None, num_classes=10,\n                    with_semantic=False):  # yapf: disable\n    \"\"\"Create a superset of inputs needed to run test or train batches.\n\n    Args:\n        input_shape (tuple):\n            input batch dimensions\n\n        num_items (None | List[int]):\n            specifies the number of boxes in each batch item\n\n        num_classes (int):\n            number of different labels a box might have\n    \"\"\"\n    from mmdet.core import BitmapMasks\n\n    (N, C, H, W) = input_shape\n\n    rng = np.random.RandomState(0)\n\n    imgs = rng.rand(*input_shape)\n\n    img_metas = [{\n        'img_shape': (H, W, C),\n        'ori_shape': (H, W, C),\n        'pad_shape': (H, W, C),\n        'filename': '<demo>.png',\n        'scale_factor': np.array([1.1, 1.2, 1.1, 1.2]),\n        'flip': False,\n        'flip_direction': None,\n    } for _ in range(N)]\n\n    gt_bboxes = []\n    gt_labels = []\n    gt_masks = []\n\n    for batch_idx in range(N):\n        if num_items is None:\n            num_boxes = rng.randint(1, 10)\n        else:\n            num_boxes = num_items[batch_idx]\n\n        cx, cy, bw, bh = rng.rand(num_boxes, 4).T\n\n        tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)\n        tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)\n        br_x = ((cx * W) + (W * bw / 2)).clip(0, W)\n        br_y = ((cy * H) + (H * bh / 2)).clip(0, H)\n\n        boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T\n        class_idxs = rng.randint(1, num_classes, size=num_boxes)\n\n        gt_bboxes.append(torch.FloatTensor(boxes))\n        gt_labels.append(torch.LongTensor(class_idxs))\n\n    mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)\n    gt_masks.append(BitmapMasks(mask, H, W))\n\n    mm_inputs = {\n        'imgs': torch.FloatTensor(imgs).requires_grad_(True),\n        'img_metas': img_metas,\n        'gt_bboxes': gt_bboxes,\n        'gt_labels': gt_labels,\n        'gt_bboxes_ignore': None,\n        'gt_masks': gt_masks,\n    }\n\n    if with_semantic:\n        # assume gt_semantic_seg using scale 1/8 of the img\n        gt_semantic_seg = np.random.randint(\n            0, num_classes, (1, 1, H // 8, W // 8), dtype=np.uint8)\n        mm_inputs.update(\n            {'gt_semantic_seg': torch.ByteTensor(gt_semantic_seg)})\n\n    return mm_inputs\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_necks.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom torch.nn.modules.batchnorm import _BatchNorm\n\nfrom mmdet.models.necks import (FPG, FPN, FPN_CARAFE, NASFCOS_FPN, NASFPN,\n                                YOLOXPAFPN, ChannelMapper, CTResNetNeck,\n                                DilatedEncoder, DyHead, SSDNeck, YOLOV3Neck)\n\n\ndef test_fpn():\n    \"\"\"Tests fpn.\"\"\"\n    s = 64\n    in_channels = [8, 16, 32, 64]\n    feat_sizes = [s // 2**i for i in range(4)]  # [64, 32, 16, 8]\n    out_channels = 8\n\n    # end_level=-1 is equal to end_level=3\n    FPN(in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=0,\n        end_level=-1,\n        num_outs=5)\n    FPN(in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=0,\n        end_level=3,\n        num_outs=5)\n\n    # `num_outs` is not equal to end_level - start_level + 1\n    with pytest.raises(AssertionError):\n        FPN(in_channels=in_channels,\n            out_channels=out_channels,\n            start_level=1,\n            end_level=2,\n            num_outs=3)\n\n    # `num_outs` is not equal to len(in_channels) - start_level\n    with pytest.raises(AssertionError):\n        FPN(in_channels=in_channels,\n            out_channels=out_channels,\n            start_level=1,\n            num_outs=2)\n\n    # `end_level` is larger than len(in_channels) - 1\n    with pytest.raises(AssertionError):\n        FPN(in_channels=in_channels,\n            out_channels=out_channels,\n            start_level=1,\n            end_level=4,\n            num_outs=2)\n\n    # `num_outs` is not equal to end_level - start_level\n    with pytest.raises(AssertionError):\n        FPN(in_channels=in_channels,\n            out_channels=out_channels,\n            start_level=1,\n            end_level=3,\n            num_outs=1)\n\n    # Invalid `add_extra_convs` option\n    with pytest.raises(AssertionError):\n        FPN(in_channels=in_channels,\n            out_channels=out_channels,\n            start_level=1,\n            add_extra_convs='on_xxx',\n            num_outs=5)\n\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=1,\n        add_extra_convs=True,\n        num_outs=5)\n\n    # FPN expects a multiple levels of features per image\n    feats = [\n        torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])\n        for i in range(len(in_channels))\n    ]\n    outs = fpn_model(feats)\n    assert fpn_model.add_extra_convs == 'on_input'\n    assert len(outs) == fpn_model.num_outs\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # Tests for fpn with no extra convs (pooling is used instead)\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=1,\n        add_extra_convs=False,\n        num_outs=5)\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    assert not fpn_model.add_extra_convs\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # Tests for fpn with lateral bns\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=1,\n        add_extra_convs=True,\n        no_norm_on_lateral=False,\n        norm_cfg=dict(type='BN', requires_grad=True),\n        num_outs=5)\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    assert fpn_model.add_extra_convs == 'on_input'\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n    bn_exist = False\n    for m in fpn_model.modules():\n        if isinstance(m, _BatchNorm):\n            bn_exist = True\n    assert bn_exist\n\n    # Bilinear upsample\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=1,\n        add_extra_convs=True,\n        upsample_cfg=dict(mode='bilinear', align_corners=True),\n        num_outs=5)\n    fpn_model(feats)\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    assert fpn_model.add_extra_convs == 'on_input'\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # Scale factor instead of fixed upsample size upsample\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        start_level=1,\n        add_extra_convs=True,\n        upsample_cfg=dict(scale_factor=2),\n        num_outs=5)\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # Extra convs source is 'inputs'\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        add_extra_convs='on_input',\n        start_level=1,\n        num_outs=5)\n    assert fpn_model.add_extra_convs == 'on_input'\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # Extra convs source is 'laterals'\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        add_extra_convs='on_lateral',\n        start_level=1,\n        num_outs=5)\n    assert fpn_model.add_extra_convs == 'on_lateral'\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # Extra convs source is 'outputs'\n    fpn_model = FPN(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        add_extra_convs='on_output',\n        start_level=1,\n        num_outs=5)\n    assert fpn_model.add_extra_convs == 'on_output'\n    outs = fpn_model(feats)\n    assert len(outs) == fpn_model.num_outs\n    for i in range(fpn_model.num_outs):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n\ndef test_channel_mapper():\n    \"\"\"Tests ChannelMapper.\"\"\"\n    s = 64\n    in_channels = [8, 16, 32, 64]\n    feat_sizes = [s // 2**i for i in range(4)]  # [64, 32, 16, 8]\n    out_channels = 8\n    kernel_size = 3\n    feats = [\n        torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])\n        for i in range(len(in_channels))\n    ]\n\n    # in_channels must be a list\n    with pytest.raises(AssertionError):\n        channel_mapper = ChannelMapper(\n            in_channels=10, out_channels=out_channels, kernel_size=kernel_size)\n    # the length of channel_mapper's inputs must be equal to the length of\n    # in_channels\n    with pytest.raises(AssertionError):\n        channel_mapper = ChannelMapper(\n            in_channels=in_channels[:-1],\n            out_channels=out_channels,\n            kernel_size=kernel_size)\n        channel_mapper(feats)\n\n    channel_mapper = ChannelMapper(\n        in_channels=in_channels,\n        out_channels=out_channels,\n        kernel_size=kernel_size)\n\n    outs = channel_mapper(feats)\n    assert len(outs) == len(feats)\n    for i in range(len(feats)):\n        outs[i].shape[1] == out_channels\n        outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n\ndef test_dilated_encoder():\n    in_channels = 16\n    out_channels = 32\n    out_shape = 34\n    dilated_encoder = DilatedEncoder(in_channels, out_channels, 16, 2,\n                                     [2, 4, 6, 8])\n    feat = [torch.rand(1, in_channels, 34, 34)]\n    out_feat = dilated_encoder(feat)[0]\n    assert out_feat.shape == (1, out_channels, out_shape, out_shape)\n\n\ndef test_ct_resnet_neck():\n    # num_filters/num_kernels must be a list\n    with pytest.raises(TypeError):\n        CTResNetNeck(\n            in_channel=10, num_deconv_filters=10, num_deconv_kernels=4)\n\n    # num_filters/num_kernels must be same length\n    with pytest.raises(AssertionError):\n        CTResNetNeck(\n            in_channel=10,\n            num_deconv_filters=(10, 10),\n            num_deconv_kernels=(4, ))\n\n    in_channels = 16\n    num_filters = (8, 8)\n    num_kernels = (4, 4)\n    feat = torch.rand(1, 16, 4, 4)\n    ct_resnet_neck = CTResNetNeck(\n        in_channel=in_channels,\n        num_deconv_filters=num_filters,\n        num_deconv_kernels=num_kernels,\n        use_dcn=False)\n\n    # feat must be list or tuple\n    with pytest.raises(AssertionError):\n        ct_resnet_neck(feat)\n\n    out_feat = ct_resnet_neck([feat])[0]\n    assert out_feat.shape == (1, num_filters[-1], 16, 16)\n\n    if torch.cuda.is_available():\n        # test dcn\n        ct_resnet_neck = CTResNetNeck(\n            in_channel=in_channels,\n            num_deconv_filters=num_filters,\n            num_deconv_kernels=num_kernels)\n        ct_resnet_neck = ct_resnet_neck.cuda()\n        feat = feat.cuda()\n        out_feat = ct_resnet_neck([feat])[0]\n        assert out_feat.shape == (1, num_filters[-1], 16, 16)\n\n\ndef test_yolov3_neck():\n    # num_scales, in_channels, out_channels must be same length\n    with pytest.raises(AssertionError):\n        YOLOV3Neck(num_scales=3, in_channels=[16, 8, 4], out_channels=[8, 4])\n\n    # len(feats) must equal to num_scales\n    with pytest.raises(AssertionError):\n        neck = YOLOV3Neck(\n            num_scales=3, in_channels=[16, 8, 4], out_channels=[8, 4, 2])\n        feats = (torch.rand(1, 4, 16, 16), torch.rand(1, 8, 16, 16))\n        neck(feats)\n\n    # test normal channels\n    s = 32\n    in_channels = [16, 8, 4]\n    out_channels = [8, 4, 2]\n    feat_sizes = [s // 2**i for i in range(len(in_channels) - 1, -1, -1)]\n    feats = [\n        torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])\n        for i in range(len(in_channels) - 1, -1, -1)\n    ]\n    neck = YOLOV3Neck(\n        num_scales=3, in_channels=in_channels, out_channels=out_channels)\n    outs = neck(feats)\n\n    assert len(outs) == len(feats)\n    for i in range(len(outs)):\n        assert outs[i].shape == \\\n               (1, out_channels[i], feat_sizes[i], feat_sizes[i])\n\n    # test more flexible setting\n    s = 32\n    in_channels = [32, 8, 16]\n    out_channels = [19, 21, 5]\n    feat_sizes = [s // 2**i for i in range(len(in_channels) - 1, -1, -1)]\n    feats = [\n        torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])\n        for i in range(len(in_channels) - 1, -1, -1)\n    ]\n    neck = YOLOV3Neck(\n        num_scales=3, in_channels=in_channels, out_channels=out_channels)\n    outs = neck(feats)\n\n    assert len(outs) == len(feats)\n    for i in range(len(outs)):\n        assert outs[i].shape == \\\n               (1, out_channels[i], feat_sizes[i], feat_sizes[i])\n\n\ndef test_ssd_neck():\n    # level_strides/level_paddings must be same length\n    with pytest.raises(AssertionError):\n        SSDNeck(\n            in_channels=[8, 16],\n            out_channels=[8, 16, 32],\n            level_strides=[2],\n            level_paddings=[2, 1])\n\n    # length of out_channels must larger than in_channels\n    with pytest.raises(AssertionError):\n        SSDNeck(\n            in_channels=[8, 16],\n            out_channels=[8],\n            level_strides=[2],\n            level_paddings=[2])\n\n    # len(out_channels) - len(in_channels) must equal to len(level_strides)\n    with pytest.raises(AssertionError):\n        SSDNeck(\n            in_channels=[8, 16],\n            out_channels=[4, 16, 64],\n            level_strides=[2, 2],\n            level_paddings=[2, 2])\n\n    # in_channels must be same with out_channels[:len(in_channels)]\n    with pytest.raises(AssertionError):\n        SSDNeck(\n            in_channels=[8, 16],\n            out_channels=[4, 16, 64],\n            level_strides=[2],\n            level_paddings=[2])\n\n    ssd_neck = SSDNeck(\n        in_channels=[4],\n        out_channels=[4, 8, 16],\n        level_strides=[2, 1],\n        level_paddings=[1, 0])\n    feats = (torch.rand(1, 4, 16, 16), )\n    outs = ssd_neck(feats)\n    assert outs[0].shape == (1, 4, 16, 16)\n    assert outs[1].shape == (1, 8, 8, 8)\n    assert outs[2].shape == (1, 16, 6, 6)\n\n    # test SSD-Lite Neck\n    ssd_neck = SSDNeck(\n        in_channels=[4, 8],\n        out_channels=[4, 8, 16],\n        level_strides=[1],\n        level_paddings=[1],\n        l2_norm_scale=None,\n        use_depthwise=True,\n        norm_cfg=dict(type='BN'),\n        act_cfg=dict(type='ReLU6'))\n    assert not hasattr(ssd_neck, 'l2_norm')\n\n    from mmcv.cnn.bricks import DepthwiseSeparableConvModule\n    assert isinstance(ssd_neck.extra_layers[0][-1],\n                      DepthwiseSeparableConvModule)\n\n    feats = (torch.rand(1, 4, 8, 8), torch.rand(1, 8, 8, 8))\n    outs = ssd_neck(feats)\n    assert outs[0].shape == (1, 4, 8, 8)\n    assert outs[1].shape == (1, 8, 8, 8)\n    assert outs[2].shape == (1, 16, 8, 8)\n\n\ndef test_yolox_pafpn():\n    s = 64\n    in_channels = [8, 16, 32, 64]\n    feat_sizes = [s // 2**i for i in range(4)]  # [64, 32, 16, 8]\n    out_channels = 24\n    feats = [\n        torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])\n        for i in range(len(in_channels))\n    ]\n    neck = YOLOXPAFPN(in_channels=in_channels, out_channels=out_channels)\n    outs = neck(feats)\n    assert len(outs) == len(feats)\n    for i in range(len(feats)):\n        assert outs[i].shape[1] == out_channels\n        assert outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    # test depth-wise\n    neck = YOLOXPAFPN(\n        in_channels=in_channels, out_channels=out_channels, use_depthwise=True)\n\n    from mmcv.cnn.bricks import DepthwiseSeparableConvModule\n    assert isinstance(neck.downsamples[0], DepthwiseSeparableConvModule)\n\n    outs = neck(feats)\n    assert len(outs) == len(feats)\n    for i in range(len(feats)):\n        assert outs[i].shape[1] == out_channels\n        assert outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n\ndef test_dyhead():\n    s = 64\n    in_channels = 8\n    out_channels = 16\n    feat_sizes = [s // 2**i for i in range(4)]  # [64, 32, 16, 8]\n    feats = [\n        torch.rand(1, in_channels, feat_sizes[i], feat_sizes[i])\n        for i in range(len(feat_sizes))\n    ]\n    neck = DyHead(\n        in_channels=in_channels, out_channels=out_channels, num_blocks=3)\n    outs = neck(feats)\n    assert len(outs) == len(feats)\n    for i in range(len(outs)):\n        assert outs[i].shape[1] == out_channels\n        assert outs[i].shape[2] == outs[i].shape[3] == s // (2**i)\n\n    feat = torch.rand(1, 8, 4, 4)\n    # input feat must be tuple or list\n    with pytest.raises(AssertionError):\n        neck(feat)\n\n\ndef test_fpg():\n    # end_level=-1 is equal to end_level=3\n    norm_cfg = dict(type='BN', requires_grad=True)\n    FPG(in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        inter_channels=8,\n        num_outs=5,\n        add_extra_convs=True,\n        start_level=1,\n        end_level=-1,\n        stack_times=9,\n        paths=['bu'] * 9,\n        same_down_trans=None,\n        same_up_trans=dict(\n            type='conv',\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_lateral_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_down_trans=dict(\n            type='interpolation_conv',\n            mode='nearest',\n            kernel_size=3,\n            norm_cfg=norm_cfg,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        across_up_trans=None,\n        across_skip_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        output_trans=dict(\n            type='last_conv',\n            kernel_size=3,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        norm_cfg=norm_cfg,\n        skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])\n    FPG(in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        inter_channels=8,\n        num_outs=5,\n        add_extra_convs=True,\n        start_level=1,\n        end_level=3,\n        stack_times=9,\n        paths=['bu'] * 9,\n        same_down_trans=None,\n        same_up_trans=dict(\n            type='conv',\n            kernel_size=3,\n            stride=2,\n            padding=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_lateral_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        across_down_trans=dict(\n            type='interpolation_conv',\n            mode='nearest',\n            kernel_size=3,\n            norm_cfg=norm_cfg,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        across_up_trans=None,\n        across_skip_trans=dict(\n            type='conv',\n            kernel_size=1,\n            norm_cfg=norm_cfg,\n            inplace=False,\n            order=('act', 'conv', 'norm')),\n        output_trans=dict(\n            type='last_conv',\n            kernel_size=3,\n            order=('act', 'conv', 'norm'),\n            inplace=False),\n        norm_cfg=norm_cfg,\n        skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])\n\n    # `end_level` is larger than len(in_channels) - 1\n    with pytest.raises(AssertionError):\n        FPG(in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            stack_times=9,\n            paths=['bu'] * 9,\n            start_level=1,\n            end_level=4,\n            num_outs=2,\n            skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])\n\n    # `num_outs` is not equal to end_level - start_level + 1\n    with pytest.raises(AssertionError):\n        FPG(in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            stack_times=9,\n            paths=['bu'] * 9,\n            start_level=1,\n            end_level=2,\n            num_outs=3,\n            skip_inds=[(0, 1, 2, 3), (0, 1, 2), (0, 1), (0, ), ()])\n\n\ndef test_fpn_carafe():\n    # end_level=-1 is equal to end_level=3\n    FPN_CARAFE(\n        in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        start_level=0,\n        end_level=3,\n        num_outs=4)\n    FPN_CARAFE(\n        in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        start_level=0,\n        end_level=-1,\n        num_outs=4)\n    # `end_level` is larger than len(in_channels) - 1\n    with pytest.raises(AssertionError):\n        FPN_CARAFE(\n            in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            start_level=1,\n            end_level=4,\n            num_outs=2)\n\n    # `num_outs` is not equal to end_level - start_level + 1\n    with pytest.raises(AssertionError):\n        FPN_CARAFE(\n            in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            start_level=1,\n            end_level=2,\n            num_outs=3)\n\n\ndef test_nas_fpn():\n    # end_level=-1 is equal to end_level=3\n    NASFPN(\n        in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        stack_times=9,\n        start_level=0,\n        end_level=3,\n        num_outs=4)\n    NASFPN(\n        in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        stack_times=9,\n        start_level=0,\n        end_level=-1,\n        num_outs=4)\n    # `end_level` is larger than len(in_channels) - 1\n    with pytest.raises(AssertionError):\n        NASFPN(\n            in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            stack_times=9,\n            start_level=1,\n            end_level=4,\n            num_outs=2)\n\n    # `num_outs` is not equal to end_level - start_level + 1\n    with pytest.raises(AssertionError):\n        NASFPN(\n            in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            stack_times=9,\n            start_level=1,\n            end_level=2,\n            num_outs=3)\n\n\ndef test_nasfcos_fpn():\n    # end_level=-1 is equal to end_level=3\n    NASFCOS_FPN(\n        in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        start_level=0,\n        end_level=3,\n        num_outs=4)\n    NASFCOS_FPN(\n        in_channels=[8, 16, 32, 64],\n        out_channels=8,\n        start_level=0,\n        end_level=-1,\n        num_outs=4)\n\n    # `end_level` is larger than len(in_channels) - 1\n    with pytest.raises(AssertionError):\n        NASFCOS_FPN(\n            in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            start_level=1,\n            end_level=4,\n            num_outs=2)\n\n    # `num_outs` is not equal to end_level - start_level + 1\n    with pytest.raises(AssertionError):\n        NASFCOS_FPN(\n            in_channels=[8, 16, 32, 64],\n            out_channels=8,\n            start_level=1,\n            end_level=2,\n            num_outs=3)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_plugins.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom mmcv import ConfigDict\nfrom mmcv.cnn import build_plugin_layer\n\nfrom mmdet.models.plugins import DropBlock\n\n\ndef test_dropblock():\n    feat = torch.rand(1, 1, 11, 11)\n    drop_prob = 1.0\n    dropblock = DropBlock(drop_prob, block_size=11, warmup_iters=0)\n    out_feat = dropblock(feat)\n    assert (out_feat == 0).all() and out_feat.shape == feat.shape\n    drop_prob = 0.5\n    dropblock = DropBlock(drop_prob, block_size=5, warmup_iters=0)\n    out_feat = dropblock(feat)\n    assert out_feat.shape == feat.shape\n\n    # drop_prob must be (0,1]\n    with pytest.raises(AssertionError):\n        DropBlock(1.5, 3)\n\n    # block_size cannot be an even number\n    with pytest.raises(AssertionError):\n        DropBlock(0.5, 2)\n\n    # warmup_iters cannot be less than 0\n    with pytest.raises(AssertionError):\n        DropBlock(0.5, 3, -1)\n\n\ndef test_pixel_decoder():\n    base_channels = 64\n    pixel_decoder_cfg = ConfigDict(\n        dict(\n            type='PixelDecoder',\n            in_channels=[base_channels * 2**i for i in range(4)],\n            feat_channels=base_channels,\n            out_channels=base_channels,\n            norm_cfg=dict(type='GN', num_groups=32),\n            act_cfg=dict(type='ReLU')))\n    self = build_plugin_layer(pixel_decoder_cfg)[1]\n    img_metas = [{}, {}]\n    feats = [\n        torch.rand((2, base_channels * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))\n        for i in range(4)\n    ]\n    mask_feature, memory = self(feats, img_metas)\n\n    assert (memory == feats[-1]).all()\n    assert mask_feature.shape == feats[0].shape\n\n\ndef test_transformer_encoder_pixel_decoder():\n    base_channels = 64\n    pixel_decoder_cfg = ConfigDict(\n        dict(\n            type='TransformerEncoderPixelDecoder',\n            in_channels=[base_channels * 2**i for i in range(4)],\n            feat_channels=base_channels,\n            out_channels=base_channels,\n            norm_cfg=dict(type='GN', num_groups=32),\n            act_cfg=dict(type='ReLU'),\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=dict(\n                        type='MultiheadAttention',\n                        embed_dims=base_channels,\n                        num_heads=8,\n                        attn_drop=0.1,\n                        proj_drop=0.1,\n                        dropout_layer=None,\n                        batch_first=False),\n                    ffn_cfgs=dict(\n                        embed_dims=base_channels,\n                        feedforward_channels=base_channels * 8,\n                        num_fcs=2,\n                        act_cfg=dict(type='ReLU', inplace=True),\n                        ffn_drop=0.1,\n                        dropout_layer=None,\n                        add_identity=True),\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm'),\n                    norm_cfg=dict(type='LN'),\n                    init_cfg=None,\n                    batch_first=False),\n                init_cfg=None),\n            positional_encoding=dict(\n                type='SinePositionalEncoding',\n                num_feats=base_channels // 2,\n                normalize=True)))\n    self = build_plugin_layer(pixel_decoder_cfg)[1]\n    img_metas = [{\n        'batch_input_shape': (128, 160),\n        'img_shape': (120, 160, 3),\n    }, {\n        'batch_input_shape': (128, 160),\n        'img_shape': (125, 160, 3),\n    }]\n    feats = [\n        torch.rand((2, base_channels * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))\n        for i in range(4)\n    ]\n    mask_feature, memory = self(feats, img_metas)\n\n    assert memory.shape[-2:] == feats[-1].shape[-2:]\n    assert mask_feature.shape == feats[0].shape\n\n\ndef test_msdeformattn_pixel_decoder():\n    base_channels = 64\n    pixel_decoder_cfg = ConfigDict(\n        dict(\n            type='MSDeformAttnPixelDecoder',\n            in_channels=[base_channels * 2**i for i in range(4)],\n            strides=[4, 8, 16, 32],\n            feat_channels=base_channels,\n            out_channels=base_channels,\n            num_outs=3,\n            norm_cfg=dict(type='GN', num_groups=32),\n            act_cfg=dict(type='ReLU'),\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=dict(\n                        type='MultiScaleDeformableAttention',\n                        embed_dims=base_channels,\n                        num_heads=8,\n                        num_levels=3,\n                        num_points=4,\n                        im2col_step=64,\n                        dropout=0.0,\n                        batch_first=False,\n                        norm_cfg=None,\n                        init_cfg=None),\n                    ffn_cfgs=dict(\n                        type='FFN',\n                        embed_dims=base_channels,\n                        feedforward_channels=base_channels * 4,\n                        num_fcs=2,\n                        ffn_drop=0.0,\n                        act_cfg=dict(type='ReLU', inplace=True)),\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm')),\n                init_cfg=None),\n            positional_encoding=dict(\n                type='SinePositionalEncoding',\n                num_feats=base_channels // 2,\n                normalize=True),\n            init_cfg=None), )\n    self = build_plugin_layer(pixel_decoder_cfg)[1]\n    feats = [\n        torch.rand((2, base_channels * 2**i, 4 * 2**(3 - i), 5 * 2**(3 - i)))\n        for i in range(4)\n    ]\n    mask_feature, multi_scale_features = self(feats)\n\n    assert mask_feature.shape == feats[0].shape\n    assert len(multi_scale_features) == 3\n    multi_scale_features = multi_scale_features[::-1]\n    for i in range(3):\n        assert multi_scale_features[i].shape[-2:] == feats[i + 1].shape[-2:]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_roi_heads/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .utils import _dummy_bbox_sampling\n\n__all__ = ['_dummy_bbox_sampling']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_roi_heads/test_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.core import bbox2roi\nfrom mmdet.models.roi_heads.bbox_heads import BBoxHead\nfrom .utils import _dummy_bbox_sampling\n\n\ndef test_bbox_head_loss():\n    \"\"\"Tests bbox head loss when truth is empty and non-empty.\"\"\"\n    self = BBoxHead(in_channels=8, roi_feat_size=3)\n\n    # Dummy proposals\n    proposal_list = [\n        torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),\n    ]\n\n    target_cfg = mmcv.Config(dict(pos_weight=1))\n\n    # Test bbox loss when truth is empty\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,\n                                            gt_labels)\n\n    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,\n                                    target_cfg)\n    labels, label_weights, bbox_targets, bbox_weights = bbox_targets\n\n    # Create dummy features \"extracted\" for each sampled bbox\n    num_sampled = sum(len(res.bboxes) for res in sampling_results)\n    rois = bbox2roi([res.bboxes for res in sampling_results])\n    dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)\n    cls_scores, bbox_preds = self.forward(dummy_feats)\n\n    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,\n                       bbox_targets, bbox_weights)\n    assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'\n    assert losses.get('loss_bbox', 0) == 0, 'empty gt loss should be zero'\n\n    # Test bbox loss when truth is non-empty\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n\n    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,\n                                            gt_labels)\n    rois = bbox2roi([res.bboxes for res in sampling_results])\n\n    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,\n                                    target_cfg)\n    labels, label_weights, bbox_targets, bbox_weights = bbox_targets\n\n    # Create dummy features \"extracted\" for each sampled bbox\n    num_sampled = sum(len(res.bboxes) for res in sampling_results)\n    dummy_feats = torch.rand(num_sampled, 8 * 3 * 3)\n    cls_scores, bbox_preds = self.forward(dummy_feats)\n\n    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,\n                       bbox_targets, bbox_weights)\n    assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'\n    assert losses.get('loss_bbox', 0) > 0, 'box-loss should be non-zero'\n\n\n@pytest.mark.parametrize('num_sample', [0, 1, 2])\ndef test_bbox_head_get_bboxes(num_sample):\n    self = BBoxHead(reg_class_agnostic=True)\n\n    num_class = 6\n    rois = torch.rand((num_sample, 5))\n    cls_score = torch.rand((num_sample, num_class))\n    bbox_pred = torch.rand((num_sample, 4))\n\n    scale_factor = np.array([2.0, 2.0, 2.0, 2.0])\n    det_bboxes, det_labels = self.get_bboxes(\n        rois, cls_score, bbox_pred, None, scale_factor, rescale=True)\n    if num_sample == 0:\n        assert len(det_bboxes) == 0 and len(det_labels) == 0\n    else:\n        assert det_bboxes.shape == bbox_pred.shape\n        assert det_labels.shape == cls_score.shape\n\n\ndef test_refine_boxes():\n    \"\"\"Mirrors the doctest in\n    ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` but checks for\n    multiple values of n_roi / n_img.\"\"\"\n    self = BBoxHead(reg_class_agnostic=True)\n\n    test_settings = [\n\n        # Corner case: less rois than images\n        {\n            'n_roi': 2,\n            'n_img': 4,\n            'rng': 34285940\n        },\n\n        # Corner case: no images\n        {\n            'n_roi': 0,\n            'n_img': 0,\n            'rng': 52925222\n        },\n\n        # Corner cases: few images / rois\n        {\n            'n_roi': 1,\n            'n_img': 1,\n            'rng': 1200281\n        },\n        {\n            'n_roi': 2,\n            'n_img': 1,\n            'rng': 1200282\n        },\n        {\n            'n_roi': 2,\n            'n_img': 2,\n            'rng': 1200283\n        },\n        {\n            'n_roi': 1,\n            'n_img': 2,\n            'rng': 1200284\n        },\n\n        # Corner case: no rois few images\n        {\n            'n_roi': 0,\n            'n_img': 1,\n            'rng': 23955860\n        },\n        {\n            'n_roi': 0,\n            'n_img': 2,\n            'rng': 25830516\n        },\n\n        # Corner case: no rois many images\n        {\n            'n_roi': 0,\n            'n_img': 10,\n            'rng': 671346\n        },\n        {\n            'n_roi': 0,\n            'n_img': 20,\n            'rng': 699807\n        },\n\n        # Corner case: cal_similarity num rois and images\n        {\n            'n_roi': 20,\n            'n_img': 20,\n            'rng': 1200238\n        },\n        {\n            'n_roi': 10,\n            'n_img': 20,\n            'rng': 1200238\n        },\n        {\n            'n_roi': 5,\n            'n_img': 5,\n            'rng': 1200238\n        },\n\n        # ----------------------------------\n        # Common case: more rois than images\n        {\n            'n_roi': 100,\n            'n_img': 1,\n            'rng': 337156\n        },\n        {\n            'n_roi': 150,\n            'n_img': 2,\n            'rng': 275898\n        },\n        {\n            'n_roi': 500,\n            'n_img': 5,\n            'rng': 4903221\n        },\n    ]\n\n    for demokw in test_settings:\n        try:\n            n_roi = demokw['n_roi']\n            n_img = demokw['n_img']\n            rng = demokw['rng']\n\n            print(f'Test refine_boxes case: {demokw!r}')\n            tup = _demodata_refine_boxes(n_roi, n_img, rng=rng)\n            rois, labels, bbox_preds, pos_is_gts, img_metas = tup\n            bboxes_list = self.refine_bboxes(rois, labels, bbox_preds,\n                                             pos_is_gts, img_metas)\n            assert len(bboxes_list) == n_img\n            assert sum(map(len, bboxes_list)) <= n_roi\n            assert all(b.shape[1] == 4 for b in bboxes_list)\n        except Exception:\n            print(f'Test failed with demokw={demokw!r}')\n            raise\n\n\ndef _demodata_refine_boxes(n_roi, n_img, rng=0):\n    \"\"\"Create random test data for the\n    ``mmdet.models.bbox_heads.bbox_head.BBoxHead.refine_boxes`` method.\"\"\"\n    import numpy as np\n\n    from mmdet.core.bbox.demodata import ensure_rng, random_boxes\n    try:\n        import kwarray\n    except ImportError:\n        import pytest\n        pytest.skip('kwarray is required for this test')\n    scale = 512\n    rng = ensure_rng(rng)\n    img_metas = [{'img_shape': (scale, scale)} for _ in range(n_img)]\n    # Create rois in the expected format\n    roi_boxes = random_boxes(n_roi, scale=scale, rng=rng)\n    if n_img == 0:\n        assert n_roi == 0, 'cannot have any rois if there are no images'\n        img_ids = torch.empty((0, ), dtype=torch.long)\n        roi_boxes = torch.empty((0, 4), dtype=torch.float32)\n    else:\n        img_ids = rng.randint(0, n_img, (n_roi, ))\n        img_ids = torch.from_numpy(img_ids)\n    rois = torch.cat([img_ids[:, None].float(), roi_boxes], dim=1)\n    # Create other args\n    labels = rng.randint(0, 2, (n_roi, ))\n    labels = torch.from_numpy(labels).long()\n    bbox_preds = random_boxes(n_roi, scale=scale, rng=rng)\n    # For each image, pretend random positive boxes are gts\n    is_label_pos = (labels.numpy() > 0).astype(np.int)\n    lbl_per_img = kwarray.group_items(is_label_pos, img_ids.numpy())\n    pos_per_img = [sum(lbl_per_img.get(gid, [])) for gid in range(n_img)]\n    # randomly generate with numpy then sort with torch\n    _pos_is_gts = [\n        rng.randint(0, 2, (npos, )).astype(np.uint8) for npos in pos_per_img\n    ]\n    pos_is_gts = [\n        torch.from_numpy(p).sort(descending=True)[0] for p in _pos_is_gts\n    ]\n    return rois, labels, bbox_preds, pos_is_gts, img_metas\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_roi_heads/test_mask_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.models.roi_heads.mask_heads import (DynamicMaskHead, FCNMaskHead,\n                                               MaskIoUHead)\nfrom .utils import _dummy_bbox_sampling\n\n\ndef test_mask_head_loss():\n    \"\"\"Test mask head loss when mask target is empty.\"\"\"\n    self = FCNMaskHead(\n        num_convs=1,\n        roi_feat_size=6,\n        in_channels=8,\n        conv_out_channels=8,\n        num_classes=8)\n\n    # Dummy proposals\n    proposal_list = [\n        torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),\n    ]\n\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,\n                                            gt_labels)\n\n    # create dummy mask\n    import numpy as np\n\n    from mmdet.core import BitmapMasks\n    dummy_mask = np.random.randint(0, 2, (1, 160, 240), dtype=np.uint8)\n    gt_masks = [BitmapMasks(dummy_mask, 160, 240)]\n\n    # create dummy train_cfg\n    train_cfg = mmcv.Config(dict(mask_size=12, mask_thr_binary=0.5))\n\n    # Create dummy features \"extracted\" for each sampled bbox\n    num_sampled = sum(len(res.bboxes) for res in sampling_results)\n    dummy_feats = torch.rand(num_sampled, 8, 6, 6)\n\n    mask_pred = self.forward(dummy_feats)\n    mask_targets = self.get_targets(sampling_results, gt_masks, train_cfg)\n    pos_labels = torch.cat([res.pos_gt_labels for res in sampling_results])\n    loss_mask = self.loss(mask_pred, mask_targets, pos_labels)\n\n    onegt_mask_loss = sum(loss_mask['loss_mask'])\n    assert onegt_mask_loss.item() > 0, 'mask loss should be non-zero'\n\n    # test mask_iou_head\n    mask_iou_head = MaskIoUHead(\n        num_convs=1,\n        num_fcs=1,\n        roi_feat_size=6,\n        in_channels=8,\n        conv_out_channels=8,\n        fc_out_channels=8,\n        num_classes=8)\n\n    pos_mask_pred = mask_pred[range(mask_pred.size(0)), pos_labels]\n    mask_iou_pred = mask_iou_head(dummy_feats, pos_mask_pred)\n    pos_mask_iou_pred = mask_iou_pred[range(mask_iou_pred.size(0)), pos_labels]\n\n    mask_iou_targets = mask_iou_head.get_targets(sampling_results, gt_masks,\n                                                 pos_mask_pred, mask_targets,\n                                                 train_cfg)\n    loss_mask_iou = mask_iou_head.loss(pos_mask_iou_pred, mask_iou_targets)\n    onegt_mask_iou_loss = loss_mask_iou['loss_mask_iou'].sum()\n    assert onegt_mask_iou_loss.item() >= 0\n\n    # test dynamic_mask_head\n    dummy_proposal_feats = torch.rand(num_sampled, 8)\n    dynamic_mask_head = DynamicMaskHead(\n        dynamic_conv_cfg=dict(\n            type='DynamicConv',\n            in_channels=8,\n            feat_channels=8,\n            out_channels=8,\n            input_feat_shape=6,\n            with_proj=False,\n            act_cfg=dict(type='ReLU', inplace=True),\n            norm_cfg=dict(type='LN')),\n        num_convs=1,\n        num_classes=8,\n        in_channels=8,\n        roi_feat_size=6)\n\n    mask_pred = dynamic_mask_head(dummy_feats, dummy_proposal_feats)\n\n    mask_target = dynamic_mask_head.get_targets(sampling_results, gt_masks,\n                                                train_cfg)\n    loss_mask = dynamic_mask_head.loss(mask_pred, mask_target, pos_labels)\n    loss_mask = loss_mask['loss_mask'].sum()\n    assert loss_mask.item() >= 0\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_roi_heads/test_roi_extractor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.roi_heads.roi_extractors import GenericRoIExtractor\n\n\ndef test_groie():\n    # test with pre/post\n    cfg = dict(\n        roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32],\n        pre_cfg=dict(\n            type='ConvModule',\n            in_channels=256,\n            out_channels=256,\n            kernel_size=5,\n            padding=2,\n            inplace=False,\n        ),\n        post_cfg=dict(\n            type='ConvModule',\n            in_channels=256,\n            out_channels=256,\n            kernel_size=5,\n            padding=2,\n            inplace=False))\n\n    groie = GenericRoIExtractor(**cfg)\n\n    feats = (\n        torch.rand((1, 256, 200, 336)),\n        torch.rand((1, 256, 100, 168)),\n        torch.rand((1, 256, 50, 84)),\n        torch.rand((1, 256, 25, 42)),\n    )\n\n    rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])\n\n    res = groie(feats, rois)\n    assert res.shape == torch.Size([1, 256, 7, 7])\n\n    # test w.o. pre/post\n    cfg = dict(\n        roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n        out_channels=256,\n        featmap_strides=[4, 8, 16, 32])\n\n    groie = GenericRoIExtractor(**cfg)\n\n    feats = (\n        torch.rand((1, 256, 200, 336)),\n        torch.rand((1, 256, 100, 168)),\n        torch.rand((1, 256, 50, 84)),\n        torch.rand((1, 256, 25, 42)),\n    )\n\n    rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])\n\n    res = groie(feats, rois)\n    assert res.shape == torch.Size([1, 256, 7, 7])\n\n    # test w.o. pre/post concat\n    cfg = dict(\n        aggregation='concat',\n        roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n        out_channels=256 * 4,\n        featmap_strides=[4, 8, 16, 32])\n\n    groie = GenericRoIExtractor(**cfg)\n\n    feats = (\n        torch.rand((1, 256, 200, 336)),\n        torch.rand((1, 256, 100, 168)),\n        torch.rand((1, 256, 50, 84)),\n        torch.rand((1, 256, 25, 42)),\n    )\n\n    rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])\n\n    res = groie(feats, rois)\n    assert res.shape == torch.Size([1, 1024, 7, 7])\n\n    # test not supported aggregate method\n    with pytest.raises(AssertionError):\n        cfg = dict(\n            aggregation='not support',\n            roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n            out_channels=1024,\n            featmap_strides=[4, 8, 16, 32])\n        _ = GenericRoIExtractor(**cfg)\n\n    # test concat channels number\n    cfg = dict(\n        aggregation='concat',\n        roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=2),\n        out_channels=256 * 5,  # 256*5 != 256*4\n        featmap_strides=[4, 8, 16, 32])\n\n    groie = GenericRoIExtractor(**cfg)\n\n    feats = (\n        torch.rand((1, 256, 200, 336)),\n        torch.rand((1, 256, 100, 168)),\n        torch.rand((1, 256, 50, 84)),\n        torch.rand((1, 256, 25, 42)),\n    )\n\n    rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]])\n\n    # out_channels does not sum of feat channels\n    with pytest.raises(AssertionError):\n        _ = groie(feats, rois)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_roi_heads/test_sabl_bbox_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport mmcv\nimport torch\n\nfrom mmdet.core import bbox2roi\nfrom mmdet.models.roi_heads.bbox_heads import SABLHead\nfrom .utils import _dummy_bbox_sampling\n\n\ndef test_sabl_bbox_head_loss():\n    \"\"\"Tests bbox head loss when truth is empty and non-empty.\"\"\"\n    self = SABLHead(\n        num_classes=4,\n        cls_in_channels=3,\n        reg_in_channels=3,\n        cls_out_channels=3,\n        reg_offset_out_channels=3,\n        reg_cls_out_channels=3,\n        roi_feat_size=7)\n\n    # Dummy proposals\n    proposal_list = [\n        torch.Tensor([[23.6667, 23.8757, 228.6326, 153.8874]]),\n    ]\n\n    target_cfg = mmcv.Config(dict(pos_weight=1))\n\n    # Test bbox loss when truth is empty\n    gt_bboxes = [torch.empty((0, 4))]\n    gt_labels = [torch.LongTensor([])]\n\n    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,\n                                            gt_labels)\n\n    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,\n                                    target_cfg)\n    labels, label_weights, bbox_targets, bbox_weights = bbox_targets\n\n    # Create dummy features \"extracted\" for each sampled bbox\n    num_sampled = sum(len(res.bboxes) for res in sampling_results)\n    rois = bbox2roi([res.bboxes for res in sampling_results])\n    dummy_feats = torch.rand(num_sampled, 3, 7, 7)\n    cls_scores, bbox_preds = self.forward(dummy_feats)\n\n    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,\n                       bbox_targets, bbox_weights)\n    assert losses.get('loss_cls', 0) > 0, 'cls-loss should be non-zero'\n    assert losses.get('loss_bbox_cls',\n                      0) == 0, 'empty gt bbox-cls-loss should be zero'\n    assert losses.get('loss_bbox_reg',\n                      0) == 0, 'empty gt bbox-reg-loss should be zero'\n\n    # Test bbox loss when truth is non-empty\n    gt_bboxes = [\n        torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),\n    ]\n    gt_labels = [torch.LongTensor([2])]\n\n    sampling_results = _dummy_bbox_sampling(proposal_list, gt_bboxes,\n                                            gt_labels)\n    rois = bbox2roi([res.bboxes for res in sampling_results])\n\n    bbox_targets = self.get_targets(sampling_results, gt_bboxes, gt_labels,\n                                    target_cfg)\n    labels, label_weights, bbox_targets, bbox_weights = bbox_targets\n\n    # Create dummy features \"extracted\" for each sampled bbox\n    num_sampled = sum(len(res.bboxes) for res in sampling_results)\n    dummy_feats = torch.rand(num_sampled, 3, 7, 7)\n    cls_scores, bbox_preds = self.forward(dummy_feats)\n\n    losses = self.loss(cls_scores, bbox_preds, rois, labels, label_weights,\n                       bbox_targets, bbox_weights)\n    assert losses.get('loss_bbox_cls',\n                      0) > 0, 'empty gt bbox-cls-loss should be zero'\n    assert losses.get('loss_bbox_reg',\n                      0) > 0, 'empty gt bbox-reg-loss should be zero'\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_roi_heads/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\n\nfrom mmdet.core import build_assigner, build_sampler\n\n\ndef _dummy_bbox_sampling(proposal_list, gt_bboxes, gt_labels):\n    \"\"\"Create sample results that can be passed to BBoxHead.get_targets.\"\"\"\n    num_imgs = 1\n    feat = torch.rand(1, 1, 3, 3)\n    assign_config = dict(\n        type='MaxIoUAssigner',\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        min_pos_iou=0.5,\n        ignore_iof_thr=-1)\n    sampler_config = dict(\n        type='RandomSampler',\n        num=512,\n        pos_fraction=0.25,\n        neg_pos_ub=-1,\n        add_gt_as_proposals=True)\n    bbox_assigner = build_assigner(assign_config)\n    bbox_sampler = build_sampler(sampler_config)\n    gt_bboxes_ignore = [None for _ in range(num_imgs)]\n    sampling_results = []\n    for i in range(num_imgs):\n        assign_result = bbox_assigner.assign(proposal_list[i], gt_bboxes[i],\n                                             gt_bboxes_ignore[i], gt_labels[i])\n        sampling_result = bbox_sampler.sample(\n            assign_result,\n            proposal_list[i],\n            gt_bboxes[i],\n            gt_labels[i],\n            feats=feat)\n        sampling_results.append(sampling_result)\n\n    return sampling_results\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_seg_heads/test_maskformer_fusion_head.py",
    "content": "import pytest\nimport torch\nfrom mmcv import ConfigDict\n\nfrom mmdet.models.seg_heads.panoptic_fusion_heads import MaskFormerFusionHead\n\n\ndef test_maskformer_fusion_head():\n    img_metas = [\n        {\n            'batch_input_shape': (128, 160),\n            'img_shape': (126, 160, 3),\n            'ori_shape': (63, 80, 3),\n            'pad_shape': (128, 160, 3)\n        },\n    ]\n    num_things_classes = 80\n    num_stuff_classes = 53\n    num_classes = num_things_classes + num_stuff_classes\n    config = ConfigDict(\n        type='MaskFormerFusionHead',\n        num_things_classes=num_things_classes,\n        num_stuff_classes=num_stuff_classes,\n        loss_panoptic=None,\n        test_cfg=dict(\n            panoptic_on=True,\n            semantic_on=False,\n            instance_on=True,\n            max_per_image=100,\n            object_mask_thr=0.8,\n            iou_thr=0.8,\n            filter_low_score=False),\n        init_cfg=None)\n\n    self = MaskFormerFusionHead(**config)\n\n    # test forward_train\n    assert self.forward_train() == dict()\n\n    mask_cls_results = torch.rand((1, 100, num_classes + 1))\n    mask_pred_results = torch.rand((1, 100, 128, 160))\n\n    # test panoptic_postprocess and instance_postprocess\n    results = self.simple_test(mask_cls_results, mask_pred_results, img_metas)\n    assert 'ins_results' in results[0] and 'pan_results' in results[0]\n\n    # test semantic_postprocess\n    config.test_cfg.semantic_on = True\n    with pytest.raises(AssertionError):\n        self.simple_test(mask_cls_results, mask_pred_results, img_metas)\n\n    with pytest.raises(NotImplementedError):\n        self.semantic_postprocess(mask_cls_results, mask_pred_results)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_brick_wrappers.py",
    "content": "from unittest.mock import patch\n\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\n\nfrom mmdet.models.utils import AdaptiveAvgPool2d, adaptive_avg_pool2d\n\nif torch.__version__ != 'parrots':\n    torch_version = '1.7'\nelse:\n    torch_version = 'parrots'\n\n\n@patch('torch.__version__', torch_version)\ndef test_adaptive_avg_pool2d():\n    # Test the empty batch dimension\n    # Test the two input conditions\n    x_empty = torch.randn(0, 3, 4, 5)\n    # 1. tuple[int, int]\n    wrapper_out = adaptive_avg_pool2d(x_empty, (2, 2))\n    assert wrapper_out.shape == (0, 3, 2, 2)\n    # 2. int\n    wrapper_out = adaptive_avg_pool2d(x_empty, 2)\n    assert wrapper_out.shape == (0, 3, 2, 2)\n\n    # wrapper op with 3-dim input\n    x_normal = torch.randn(3, 3, 4, 5)\n    wrapper_out = adaptive_avg_pool2d(x_normal, (2, 2))\n    ref_out = F.adaptive_avg_pool2d(x_normal, (2, 2))\n    assert wrapper_out.shape == (3, 3, 2, 2)\n    assert torch.equal(wrapper_out, ref_out)\n\n    wrapper_out = adaptive_avg_pool2d(x_normal, 2)\n    ref_out = F.adaptive_avg_pool2d(x_normal, 2)\n    assert wrapper_out.shape == (3, 3, 2, 2)\n    assert torch.equal(wrapper_out, ref_out)\n\n\n@patch('torch.__version__', torch_version)\ndef test_AdaptiveAvgPool2d():\n    # Test the empty batch dimension\n    x_empty = torch.randn(0, 3, 4, 5)\n    # Test the four input conditions\n    # 1. tuple[int, int]\n    wrapper = AdaptiveAvgPool2d((2, 2))\n    wrapper_out = wrapper(x_empty)\n    assert wrapper_out.shape == (0, 3, 2, 2)\n\n    # 2. int\n    wrapper = AdaptiveAvgPool2d(2)\n    wrapper_out = wrapper(x_empty)\n    assert wrapper_out.shape == (0, 3, 2, 2)\n\n    # 3. tuple[None, int]\n    wrapper = AdaptiveAvgPool2d((None, 2))\n    wrapper_out = wrapper(x_empty)\n    assert wrapper_out.shape == (0, 3, 4, 2)\n\n    # 3. tuple[int, None]\n    wrapper = AdaptiveAvgPool2d((2, None))\n    wrapper_out = wrapper(x_empty)\n    assert wrapper_out.shape == (0, 3, 2, 5)\n\n    # Test the normal batch dimension\n    x_normal = torch.randn(3, 3, 4, 5)\n    wrapper = AdaptiveAvgPool2d((2, 2))\n    ref = nn.AdaptiveAvgPool2d((2, 2))\n    wrapper_out = wrapper(x_normal)\n    ref_out = ref(x_normal)\n    assert wrapper_out.shape == (3, 3, 2, 2)\n    assert torch.equal(wrapper_out, ref_out)\n\n    wrapper = AdaptiveAvgPool2d(2)\n    ref = nn.AdaptiveAvgPool2d(2)\n    wrapper_out = wrapper(x_normal)\n    ref_out = ref(x_normal)\n    assert wrapper_out.shape == (3, 3, 2, 2)\n    assert torch.equal(wrapper_out, ref_out)\n\n    wrapper = AdaptiveAvgPool2d((None, 2))\n    ref = nn.AdaptiveAvgPool2d((None, 2))\n    wrapper_out = wrapper(x_normal)\n    ref_out = ref(x_normal)\n    assert wrapper_out.shape == (3, 3, 4, 2)\n    assert torch.equal(wrapper_out, ref_out)\n\n    wrapper = AdaptiveAvgPool2d((2, None))\n    ref = nn.AdaptiveAvgPool2d((2, None))\n    wrapper_out = wrapper(x_normal)\n    ref_out = ref(x_normal)\n    assert wrapper_out.shape == (3, 3, 2, 5)\n    assert torch.equal(wrapper_out, ref_out)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_conv_upsample.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.utils import ConvUpsample\n\n\n@pytest.mark.parametrize('num_layers', [0, 1, 2])\ndef test_conv_upsample(num_layers):\n    num_upsample = num_layers if num_layers > 0 else 0\n    num_layers = num_layers if num_layers > 0 else 1\n    layer = ConvUpsample(\n        10,\n        5,\n        num_layers=num_layers,\n        num_upsample=num_upsample,\n        conv_cfg=None,\n        norm_cfg=None)\n\n    size = 5\n    x = torch.randn((1, 10, size, size))\n    size = size * pow(2, num_upsample)\n    x = layer(x)\n    assert x.shape[-2:] == (size, size)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_inverted_residual.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom mmcv.cnn import is_norm\nfrom torch.nn.modules import GroupNorm\n\nfrom mmdet.models.utils import InvertedResidual, SELayer\n\n\ndef test_inverted_residual():\n\n    with pytest.raises(AssertionError):\n        # stride must be in [1, 2]\n        InvertedResidual(16, 16, 32, stride=3)\n\n    with pytest.raises(AssertionError):\n        # se_cfg must be None or dict\n        InvertedResidual(16, 16, 32, se_cfg=list())\n\n    with pytest.raises(AssertionError):\n        # in_channeld and mid_channels must be the same if\n        # with_expand_conv is False\n        InvertedResidual(16, 16, 32, with_expand_conv=False)\n\n    # Test InvertedResidual forward, stride=1\n    block = InvertedResidual(16, 16, 32, stride=1)\n    x = torch.randn(1, 16, 56, 56)\n    x_out = block(x)\n    assert getattr(block, 'se', None) is None\n    assert block.with_res_shortcut\n    assert x_out.shape == torch.Size((1, 16, 56, 56))\n\n    # Test InvertedResidual forward, stride=2\n    block = InvertedResidual(16, 16, 32, stride=2)\n    x = torch.randn(1, 16, 56, 56)\n    x_out = block(x)\n    assert not block.with_res_shortcut\n    assert x_out.shape == torch.Size((1, 16, 28, 28))\n\n    # Test InvertedResidual forward with se layer\n    se_cfg = dict(channels=32)\n    block = InvertedResidual(16, 16, 32, stride=1, se_cfg=se_cfg)\n    x = torch.randn(1, 16, 56, 56)\n    x_out = block(x)\n    assert isinstance(block.se, SELayer)\n    assert x_out.shape == torch.Size((1, 16, 56, 56))\n\n    # Test InvertedResidual forward, with_expand_conv=False\n    block = InvertedResidual(32, 16, 32, with_expand_conv=False)\n    x = torch.randn(1, 32, 56, 56)\n    x_out = block(x)\n    assert getattr(block, 'expand_conv', None) is None\n    assert x_out.shape == torch.Size((1, 16, 56, 56))\n\n    # Test InvertedResidual forward with GroupNorm\n    block = InvertedResidual(\n        16, 16, 32, norm_cfg=dict(type='GN', num_groups=2))\n    x = torch.randn(1, 16, 56, 56)\n    x_out = block(x)\n    for m in block.modules():\n        if is_norm(m):\n            assert isinstance(m, GroupNorm)\n    assert x_out.shape == torch.Size((1, 16, 56, 56))\n\n    # Test InvertedResidual forward with HSigmoid\n    block = InvertedResidual(16, 16, 32, act_cfg=dict(type='HSigmoid'))\n    x = torch.randn(1, 16, 56, 56)\n    x_out = block(x)\n    assert x_out.shape == torch.Size((1, 16, 56, 56))\n\n    # Test InvertedResidual forward with checkpoint\n    block = InvertedResidual(16, 16, 32, with_cp=True)\n    x = torch.randn(1, 16, 56, 56)\n    x_out = block(x)\n    assert block.with_cp\n    assert x_out.shape == torch.Size((1, 16, 56, 56))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_model_misc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport torch\nfrom torch.autograd import gradcheck\n\nfrom mmdet.models.utils import interpolate_as, sigmoid_geometric_mean\n\n\ndef test_interpolate_as():\n    source = torch.rand((1, 5, 4, 4))\n    target = torch.rand((1, 1, 16, 16))\n\n    # Test 4D source and target\n    result = interpolate_as(source, target)\n    assert result.shape == torch.Size((1, 5, 16, 16))\n\n    # Test 3D target\n    result = interpolate_as(source, target.squeeze(0))\n    assert result.shape == torch.Size((1, 5, 16, 16))\n\n    # Test 3D source\n    result = interpolate_as(source.squeeze(0), target)\n    assert result.shape == torch.Size((5, 16, 16))\n\n    # Test type(target) == np.ndarray\n    target = np.random.rand(16, 16)\n    result = interpolate_as(source.squeeze(0), target)\n    assert result.shape == torch.Size((5, 16, 16))\n\n\ndef test_sigmoid_geometric_mean():\n    x = torch.randn(20, 20, dtype=torch.double, requires_grad=True)\n    y = torch.randn(20, 20, dtype=torch.double, requires_grad=True)\n    inputs = (x, y)\n    test = gradcheck(sigmoid_geometric_mean, inputs, eps=1e-6, atol=1e-4)\n    assert test\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_position_encoding.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.models.utils import (LearnedPositionalEncoding,\n                                SinePositionalEncoding)\n\n\ndef test_sine_positional_encoding(num_feats=16, batch_size=2):\n    # test invalid type of scale\n    with pytest.raises(AssertionError):\n        module = SinePositionalEncoding(\n            num_feats, scale=(3., ), normalize=True)\n\n    module = SinePositionalEncoding(num_feats)\n    h, w = 10, 6\n    mask = (torch.rand(batch_size, h, w) > 0.5).to(torch.int)\n    assert not module.normalize\n    out = module(mask)\n    assert out.shape == (batch_size, num_feats * 2, h, w)\n\n    # set normalize\n    module = SinePositionalEncoding(num_feats, normalize=True)\n    assert module.normalize\n    out = module(mask)\n    assert out.shape == (batch_size, num_feats * 2, h, w)\n\n\ndef test_learned_positional_encoding(num_feats=16,\n                                     row_num_embed=10,\n                                     col_num_embed=10,\n                                     batch_size=2):\n    module = LearnedPositionalEncoding(num_feats, row_num_embed, col_num_embed)\n    assert module.row_embed.weight.shape == (row_num_embed, num_feats)\n    assert module.col_embed.weight.shape == (col_num_embed, num_feats)\n    h, w = 10, 6\n    mask = torch.rand(batch_size, h, w) > 0.5\n    out = module(mask)\n    assert out.shape == (batch_size, num_feats * 2, h, w)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_se_layer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nimport torch.nn.functional as F\nfrom mmcv.cnn import constant_init\n\nfrom mmdet.models.utils import DyReLU, SELayer\n\n\ndef test_se_layer():\n    with pytest.raises(AssertionError):\n        # act_cfg sequence length must equal to 2\n        SELayer(channels=32, act_cfg=(dict(type='ReLU'), ))\n\n    with pytest.raises(AssertionError):\n        # act_cfg sequence must be a tuple of dict\n        SELayer(channels=32, act_cfg=[dict(type='ReLU'), dict(type='ReLU')])\n\n    # Test SELayer forward\n    layer = SELayer(channels=32)\n    layer.init_weights()\n    layer.train()\n\n    x = torch.randn((1, 32, 10, 10))\n    x_out = layer(x)\n    assert x_out.shape == torch.Size((1, 32, 10, 10))\n\n\ndef test_dyrelu():\n    with pytest.raises(AssertionError):\n        # act_cfg sequence length must equal to 2\n        DyReLU(channels=32, act_cfg=(dict(type='ReLU'), ))\n\n    with pytest.raises(AssertionError):\n        # act_cfg sequence must be a tuple of dict\n        DyReLU(channels=32, act_cfg=[dict(type='ReLU'), dict(type='ReLU')])\n\n    # Test DyReLU forward\n    layer = DyReLU(channels=32)\n    layer.init_weights()\n    layer.train()\n    x = torch.randn((1, 32, 10, 10))\n    x_out = layer(x)\n    assert x_out.shape == torch.Size((1, 32, 10, 10))\n\n    # DyReLU should act as standard (static) ReLU\n    # when eliminating the effect of SE-like module\n    layer = DyReLU(channels=32)\n    constant_init(layer.conv2.conv, 0)\n    layer.train()\n    x = torch.randn((1, 32, 10, 10))\n    x_out = layer(x)\n    relu_out = F.relu(x)\n    assert torch.equal(x_out, relu_out)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_models/test_utils/test_transformer.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\nfrom mmcv.utils import ConfigDict\n\nfrom mmdet.models.utils.transformer import (AdaptivePadding,\n                                            DetrTransformerDecoder,\n                                            DetrTransformerEncoder, PatchEmbed,\n                                            PatchMerging, Transformer)\n\n\ndef test_adaptive_padding():\n\n    for padding in ('same', 'corner'):\n        kernel_size = 16\n        stride = 16\n        dilation = 1\n        input = torch.rand(1, 1, 15, 17)\n        pool = AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=padding)\n        out = pool(input)\n        # padding to divisible by 16\n        assert (out.shape[2], out.shape[3]) == (16, 32)\n        input = torch.rand(1, 1, 16, 17)\n        out = pool(input)\n        # padding to divisible by 16\n        assert (out.shape[2], out.shape[3]) == (16, 32)\n\n        kernel_size = (2, 2)\n        stride = (2, 2)\n        dilation = (1, 1)\n\n        adap_pad = AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=padding)\n        input = torch.rand(1, 1, 11, 13)\n        out = adap_pad(input)\n        # padding to divisible by 2\n        assert (out.shape[2], out.shape[3]) == (12, 14)\n\n        kernel_size = (2, 2)\n        stride = (10, 10)\n        dilation = (1, 1)\n\n        adap_pad = AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=padding)\n        input = torch.rand(1, 1, 10, 13)\n        out = adap_pad(input)\n        #  no padding\n        assert (out.shape[2], out.shape[3]) == (10, 13)\n\n        kernel_size = (11, 11)\n        adap_pad = AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=padding)\n        input = torch.rand(1, 1, 11, 13)\n        out = adap_pad(input)\n        #  all padding\n        assert (out.shape[2], out.shape[3]) == (21, 21)\n\n        # test padding as kernel is (7,9)\n        input = torch.rand(1, 1, 11, 13)\n        stride = (3, 4)\n        kernel_size = (4, 5)\n        dilation = (2, 2)\n        # actually (7, 9)\n        adap_pad = AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=padding)\n        dilation_out = adap_pad(input)\n        assert (dilation_out.shape[2], dilation_out.shape[3]) == (16, 21)\n        kernel_size = (7, 9)\n        dilation = (1, 1)\n        adap_pad = AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=padding)\n        kernel79_out = adap_pad(input)\n        assert (kernel79_out.shape[2], kernel79_out.shape[3]) == (16, 21)\n        assert kernel79_out.shape == dilation_out.shape\n\n    # assert only support \"same\" \"corner\"\n    with pytest.raises(AssertionError):\n        AdaptivePadding(\n            kernel_size=kernel_size,\n            stride=stride,\n            dilation=dilation,\n            padding=1)\n\n\ndef test_patch_embed():\n    B = 2\n    H = 3\n    W = 4\n    C = 3\n    embed_dims = 10\n    kernel_size = 3\n    stride = 1\n    dummy_input = torch.rand(B, C, H, W)\n    patch_merge_1 = PatchEmbed(\n        in_channels=C,\n        embed_dims=embed_dims,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=0,\n        dilation=1,\n        norm_cfg=None)\n\n    x1, shape = patch_merge_1(dummy_input)\n    # test out shape\n    assert x1.shape == (2, 2, 10)\n    # test outsize is correct\n    assert shape == (1, 2)\n    # test L = out_h * out_w\n    assert shape[0] * shape[1] == x1.shape[1]\n\n    B = 2\n    H = 10\n    W = 10\n    C = 3\n    embed_dims = 10\n    kernel_size = 5\n    stride = 2\n    dummy_input = torch.rand(B, C, H, W)\n    # test dilation\n    patch_merge_2 = PatchEmbed(\n        in_channels=C,\n        embed_dims=embed_dims,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=0,\n        dilation=2,\n        norm_cfg=None,\n    )\n\n    x2, shape = patch_merge_2(dummy_input)\n    # test out shape\n    assert x2.shape == (2, 1, 10)\n    # test outsize is correct\n    assert shape == (1, 1)\n    # test L = out_h * out_w\n    assert shape[0] * shape[1] == x2.shape[1]\n\n    stride = 2\n    input_size = (10, 10)\n\n    dummy_input = torch.rand(B, C, H, W)\n    # test stride and norm\n    patch_merge_3 = PatchEmbed(\n        in_channels=C,\n        embed_dims=embed_dims,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=0,\n        dilation=2,\n        norm_cfg=dict(type='LN'),\n        input_size=input_size)\n\n    x3, shape = patch_merge_3(dummy_input)\n    # test out shape\n    assert x3.shape == (2, 1, 10)\n    # test outsize is correct\n    assert shape == (1, 1)\n    # test L = out_h * out_w\n    assert shape[0] * shape[1] == x3.shape[1]\n\n    # test the init_out_size with nn.Unfold\n    assert patch_merge_3.init_out_size[1] == (input_size[0] - 2 * 4 -\n                                              1) // 2 + 1\n    assert patch_merge_3.init_out_size[0] == (input_size[0] - 2 * 4 -\n                                              1) // 2 + 1\n    H = 11\n    W = 12\n    input_size = (H, W)\n    dummy_input = torch.rand(B, C, H, W)\n    # test stride and norm\n    patch_merge_3 = PatchEmbed(\n        in_channels=C,\n        embed_dims=embed_dims,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=0,\n        dilation=2,\n        norm_cfg=dict(type='LN'),\n        input_size=input_size)\n\n    _, shape = patch_merge_3(dummy_input)\n    # when input_size equal to real input\n    # the out_size should be equal to `init_out_size`\n    assert shape == patch_merge_3.init_out_size\n\n    input_size = (H, W)\n    dummy_input = torch.rand(B, C, H, W)\n    # test stride and norm\n    patch_merge_3 = PatchEmbed(\n        in_channels=C,\n        embed_dims=embed_dims,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=0,\n        dilation=2,\n        norm_cfg=dict(type='LN'),\n        input_size=input_size)\n\n    _, shape = patch_merge_3(dummy_input)\n    # when input_size equal to real input\n    # the out_size should be equal to `init_out_size`\n    assert shape == patch_merge_3.init_out_size\n\n    # test adap padding\n    for padding in ('same', 'corner'):\n        in_c = 2\n        embed_dims = 3\n        B = 2\n\n        # test stride is 1\n        input_size = (5, 5)\n        kernel_size = (5, 5)\n        stride = (1, 1)\n        dilation = 1\n        bias = False\n\n        x = torch.rand(B, in_c, *input_size)\n        patch_embed = PatchEmbed(\n            in_channels=in_c,\n            embed_dims=embed_dims,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_embed(x)\n        assert x_out.size() == (B, 25, 3)\n        assert out_size == (5, 5)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n        # test kernel_size == stride\n        input_size = (5, 5)\n        kernel_size = (5, 5)\n        stride = (5, 5)\n        dilation = 1\n        bias = False\n\n        x = torch.rand(B, in_c, *input_size)\n        patch_embed = PatchEmbed(\n            in_channels=in_c,\n            embed_dims=embed_dims,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_embed(x)\n        assert x_out.size() == (B, 1, 3)\n        assert out_size == (1, 1)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n        # test kernel_size == stride\n        input_size = (6, 5)\n        kernel_size = (5, 5)\n        stride = (5, 5)\n        dilation = 1\n        bias = False\n\n        x = torch.rand(B, in_c, *input_size)\n        patch_embed = PatchEmbed(\n            in_channels=in_c,\n            embed_dims=embed_dims,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_embed(x)\n        assert x_out.size() == (B, 2, 3)\n        assert out_size == (2, 1)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n        # test different kernel_size with different stride\n        input_size = (6, 5)\n        kernel_size = (6, 2)\n        stride = (6, 2)\n        dilation = 1\n        bias = False\n\n        x = torch.rand(B, in_c, *input_size)\n        patch_embed = PatchEmbed(\n            in_channels=in_c,\n            embed_dims=embed_dims,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_embed(x)\n        assert x_out.size() == (B, 3, 3)\n        assert out_size == (1, 3)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n\ndef test_patch_merging():\n\n    # Test the model with int padding\n    in_c = 3\n    out_c = 4\n    kernel_size = 3\n    stride = 3\n    padding = 1\n    dilation = 1\n    bias = False\n    # test the case `pad_to_stride` is False\n    patch_merge = PatchMerging(\n        in_channels=in_c,\n        out_channels=out_c,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=padding,\n        dilation=dilation,\n        bias=bias)\n    B, L, C = 1, 100, 3\n    input_size = (10, 10)\n    x = torch.rand(B, L, C)\n    x_out, out_size = patch_merge(x, input_size)\n    assert x_out.size() == (1, 16, 4)\n    assert out_size == (4, 4)\n    # assert out size is consistent with real output\n    assert x_out.size(1) == out_size[0] * out_size[1]\n    in_c = 4\n    out_c = 5\n    kernel_size = 6\n    stride = 3\n    padding = 2\n    dilation = 2\n    bias = False\n    patch_merge = PatchMerging(\n        in_channels=in_c,\n        out_channels=out_c,\n        kernel_size=kernel_size,\n        stride=stride,\n        padding=padding,\n        dilation=dilation,\n        bias=bias)\n    B, L, C = 1, 100, 4\n    input_size = (10, 10)\n    x = torch.rand(B, L, C)\n    x_out, out_size = patch_merge(x, input_size)\n    assert x_out.size() == (1, 4, 5)\n    assert out_size == (2, 2)\n    # assert out size is consistent with real output\n    assert x_out.size(1) == out_size[0] * out_size[1]\n\n    # Test with adaptive padding\n    for padding in ('same', 'corner'):\n        in_c = 2\n        out_c = 3\n        B = 2\n\n        # test stride is 1\n        input_size = (5, 5)\n        kernel_size = (5, 5)\n        stride = (1, 1)\n        dilation = 1\n        bias = False\n        L = input_size[0] * input_size[1]\n\n        x = torch.rand(B, L, in_c)\n        patch_merge = PatchMerging(\n            in_channels=in_c,\n            out_channels=out_c,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_merge(x, input_size)\n        assert x_out.size() == (B, 25, 3)\n        assert out_size == (5, 5)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n        # test kernel_size == stride\n        input_size = (5, 5)\n        kernel_size = (5, 5)\n        stride = (5, 5)\n        dilation = 1\n        bias = False\n        L = input_size[0] * input_size[1]\n\n        x = torch.rand(B, L, in_c)\n        patch_merge = PatchMerging(\n            in_channels=in_c,\n            out_channels=out_c,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_merge(x, input_size)\n        assert x_out.size() == (B, 1, 3)\n        assert out_size == (1, 1)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n        # test kernel_size == stride\n        input_size = (6, 5)\n        kernel_size = (5, 5)\n        stride = (5, 5)\n        dilation = 1\n        bias = False\n        L = input_size[0] * input_size[1]\n\n        x = torch.rand(B, L, in_c)\n        patch_merge = PatchMerging(\n            in_channels=in_c,\n            out_channels=out_c,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_merge(x, input_size)\n        assert x_out.size() == (B, 2, 3)\n        assert out_size == (2, 1)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n        # test different kernel_size with different stride\n        input_size = (6, 5)\n        kernel_size = (6, 2)\n        stride = (6, 2)\n        dilation = 1\n        bias = False\n        L = input_size[0] * input_size[1]\n\n        x = torch.rand(B, L, in_c)\n        patch_merge = PatchMerging(\n            in_channels=in_c,\n            out_channels=out_c,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            dilation=dilation,\n            bias=bias)\n\n        x_out, out_size = patch_merge(x, input_size)\n        assert x_out.size() == (B, 3, 3)\n        assert out_size == (1, 3)\n        assert x_out.size(1) == out_size[0] * out_size[1]\n\n\ndef test_detr_transformer_dencoder_encoder_layer():\n    config = ConfigDict(\n        dict(\n            return_intermediate=True,\n            num_layers=6,\n            transformerlayers=dict(\n                type='DetrTransformerDecoderLayer',\n                attn_cfgs=dict(\n                    type='MultiheadAttention',\n                    embed_dims=256,\n                    num_heads=8,\n                    dropout=0.1),\n                feedforward_channels=2048,\n                ffn_dropout=0.1,\n                operation_order=(\n                    'norm',\n                    'self_attn',\n                    'norm',\n                    'cross_attn',\n                    'norm',\n                    'ffn',\n                ))))\n    assert DetrTransformerDecoder(**config).layers[0].pre_norm\n    assert len(DetrTransformerDecoder(**config).layers) == 6\n\n    DetrTransformerDecoder(**config)\n    with pytest.raises(AssertionError):\n        config = ConfigDict(\n            dict(\n                return_intermediate=True,\n                num_layers=6,\n                transformerlayers=[\n                    dict(\n                        type='DetrTransformerDecoderLayer',\n                        attn_cfgs=dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1),\n                        feedforward_channels=2048,\n                        ffn_dropout=0.1,\n                        operation_order=('self_attn', 'norm', 'cross_attn',\n                                         'norm', 'ffn', 'norm'))\n                ] * 5))\n        DetrTransformerDecoder(**config)\n\n    config = ConfigDict(\n        dict(\n            num_layers=6,\n            transformerlayers=dict(\n                type='DetrTransformerDecoderLayer',\n                attn_cfgs=dict(\n                    type='MultiheadAttention',\n                    embed_dims=256,\n                    num_heads=8,\n                    dropout=0.1),\n                feedforward_channels=2048,\n                ffn_dropout=0.1,\n                operation_order=('norm', 'self_attn', 'norm', 'cross_attn',\n                                 'norm', 'ffn', 'norm'))))\n\n    with pytest.raises(AssertionError):\n        # len(operation_order) == 6\n        DetrTransformerEncoder(**config)\n\n\ndef test_transformer():\n    config = ConfigDict(\n        dict(\n            encoder=dict(\n                type='DetrTransformerEncoder',\n                num_layers=6,\n                transformerlayers=dict(\n                    type='BaseTransformerLayer',\n                    attn_cfgs=[\n                        dict(\n                            type='MultiheadAttention',\n                            embed_dims=256,\n                            num_heads=8,\n                            dropout=0.1)\n                    ],\n                    feedforward_channels=2048,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'ffn', 'norm'))),\n            decoder=dict(\n                type='DetrTransformerDecoder',\n                return_intermediate=True,\n                num_layers=6,\n                transformerlayers=dict(\n                    type='DetrTransformerDecoderLayer',\n                    attn_cfgs=dict(\n                        type='MultiheadAttention',\n                        embed_dims=256,\n                        num_heads=8,\n                        dropout=0.1),\n                    feedforward_channels=2048,\n                    ffn_dropout=0.1,\n                    operation_order=('self_attn', 'norm', 'cross_attn', 'norm',\n                                     'ffn', 'norm')),\n            )))\n    transformer = Transformer(**config)\n    transformer.init_weights()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_onnx/__init__.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom .utils import ort_validate\n\n__all__ = ['ort_validate']\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_onnx/test_head.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nfrom functools import partial\n\nimport mmcv\nimport numpy as np\nimport pytest\nimport torch\nfrom mmcv.cnn import Scale\n\nfrom mmdet import digit_version\nfrom mmdet.models import build_detector\nfrom mmdet.models.dense_heads import (FCOSHead, FSAFHead, RetinaHead, SSDHead,\n                                      YOLOV3Head)\nfrom .utils import ort_validate\n\ndata_path = osp.join(osp.dirname(__file__), 'data')\n\nif digit_version(torch.__version__) <= digit_version('1.5.0'):\n    pytest.skip(\n        'ort backend does not support version below 1.5.0',\n        allow_module_level=True)\n\n\ndef test_cascade_onnx_export():\n\n    config_path = './configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.py'\n    cfg = mmcv.Config.fromfile(config_path)\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    with torch.no_grad():\n        model.forward = partial(model.forward, img_metas=[[dict()]])\n\n        dynamic_axes = {\n            'input_img': {\n                0: 'batch',\n                2: 'width',\n                3: 'height'\n            },\n            'dets': {\n                0: 'batch',\n                1: 'num_dets',\n            },\n            'labels': {\n                0: 'batch',\n                1: 'num_dets',\n            },\n        }\n        torch.onnx.export(\n            model, [torch.rand(1, 3, 400, 500)],\n            'tmp.onnx',\n            output_names=['dets', 'labels'],\n            input_names=['input_img'],\n            keep_initializers_as_inputs=True,\n            do_constant_folding=True,\n            verbose=False,\n            opset_version=11,\n            dynamic_axes=dynamic_axes)\n\n\ndef test_faster_onnx_export():\n\n    config_path = './configs/faster_rcnn/faster_rcnn_r50_fpn_1x_coco.py'\n    cfg = mmcv.Config.fromfile(config_path)\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    with torch.no_grad():\n        model.forward = partial(model.forward, img_metas=[[dict()]])\n\n        dynamic_axes = {\n            'input_img': {\n                0: 'batch',\n                2: 'width',\n                3: 'height'\n            },\n            'dets': {\n                0: 'batch',\n                1: 'num_dets',\n            },\n            'labels': {\n                0: 'batch',\n                1: 'num_dets',\n            },\n        }\n        torch.onnx.export(\n            model, [torch.rand(1, 3, 400, 500)],\n            'tmp.onnx',\n            output_names=['dets', 'labels'],\n            input_names=['input_img'],\n            keep_initializers_as_inputs=True,\n            do_constant_folding=True,\n            verbose=False,\n            opset_version=11,\n            dynamic_axes=dynamic_axes)\n\n\ndef retinanet_config():\n    \"\"\"RetinanNet Head Config.\"\"\"\n    head_cfg = dict(\n        stacked_convs=6,\n        feat_channels=2,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]))\n\n    test_cfg = mmcv.Config(\n        dict(\n            deploy_nms_pre=0,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100))\n\n    model = RetinaHead(\n        num_classes=4, in_channels=1, test_cfg=test_cfg, **head_cfg)\n    model.requires_grad_(False)\n\n    return model\n\n\ndef test_retina_head_forward_single():\n    \"\"\"Test RetinaNet Head single forward in torch and onnxruntime env.\"\"\"\n    retina_model = retinanet_config()\n\n    feat = torch.rand(1, retina_model.in_channels, 32, 32)\n    # validate the result between the torch and ort\n    ort_validate(retina_model.forward_single, feat)\n\n\ndef test_retina_head_forward():\n    \"\"\"Test RetinaNet Head forward in torch and onnxruntime env.\"\"\"\n    retina_model = retinanet_config()\n    s = 128\n    # RetinaNet head expects a multiple levels of features per image\n    feats = [\n        torch.rand(1, retina_model.in_channels, s // (2**(i + 2)),\n                   s // (2**(i + 2)))  # [32, 16, 8, 4, 2]\n        for i in range(len(retina_model.prior_generator.strides))\n    ]\n    ort_validate(retina_model.forward, feats)\n\n\ndef test_retinanet_head_onnx_export():\n    \"\"\"Test RetinaNet Head _get_bboxes() in torch and onnxruntime env.\"\"\"\n    retina_model = retinanet_config()\n    s = 128\n    img_metas = [{\n        'img_shape_for_onnx': torch.Tensor([s, s]),\n        'scale_factor': np.ones(4),\n        'pad_shape': (s, s, 3),\n        'img_shape': (s, s, 2)\n    }]\n\n    # The data of retina_head_get_bboxes.pkl contains two parts:\n    # cls_score(list(Tensor)) and bboxes(list(Tensor)),\n    # where each torch.Tensor is generated by torch.rand().\n    # the cls_score's size: (1, 36, 32, 32), (1, 36, 16, 16),\n    # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2).\n    # the bboxes's size: (1, 36, 32, 32), (1, 36, 16, 16),\n    # (1, 36, 8, 8), (1, 36, 4, 4), (1, 36, 2, 2)\n    retina_head_data = 'retina_head_get_bboxes.pkl'\n    feats = mmcv.load(osp.join(data_path, retina_head_data))\n    cls_score = feats[:5]\n    bboxes = feats[5:]\n\n    retina_model.onnx_export = partial(\n        retina_model.onnx_export, img_metas=img_metas, with_nms=False)\n    ort_validate(retina_model.onnx_export, (cls_score, bboxes))\n\n\ndef yolo_config():\n    \"\"\"YoloV3 Head Config.\"\"\"\n    head_cfg = dict(\n        anchor_generator=dict(\n            type='YOLOAnchorGenerator',\n            base_sizes=[[(116, 90), (156, 198), (373, 326)],\n                        [(30, 61), (62, 45), (59, 119)],\n                        [(10, 13), (16, 30), (33, 23)]],\n            strides=[32, 16, 8]),\n        bbox_coder=dict(type='YOLOBBoxCoder'))\n\n    test_cfg = mmcv.Config(\n        dict(\n            deploy_nms_pre=0,\n            min_bbox_size=0,\n            score_thr=0.05,\n            conf_thr=0.005,\n            nms=dict(type='nms', iou_threshold=0.45),\n            max_per_img=100))\n\n    model = YOLOV3Head(\n        num_classes=4,\n        in_channels=[1, 1, 1],\n        out_channels=[16, 8, 4],\n        test_cfg=test_cfg,\n        **head_cfg)\n    model.requires_grad_(False)\n    # yolov3 need eval()\n    model.cpu().eval()\n    return model\n\n\ndef test_yolov3_head_forward():\n    \"\"\"Test Yolov3 head forward() in torch and ort env.\"\"\"\n    yolo_model = yolo_config()\n\n    # Yolov3 head expects a multiple levels of features per image\n    feats = [\n        torch.rand(1, 1, 64 // (2**(i + 2)), 64 // (2**(i + 2)))\n        for i in range(len(yolo_model.in_channels))\n    ]\n    ort_validate(yolo_model.forward, feats)\n\n\ndef test_yolov3_head_onnx_export():\n    \"\"\"Test yolov3 head get_bboxes() in torch and ort env.\"\"\"\n    yolo_model = yolo_config()\n    s = 128\n    img_metas = [{\n        'img_shape_for_onnx': torch.Tensor([s, s]),\n        'img_shape': (s, s, 3),\n        'scale_factor': np.ones(4),\n        'pad_shape': (s, s, 3)\n    }]\n\n    # The data of yolov3_head_get_bboxes.pkl contains\n    # a list of torch.Tensor, where each torch.Tensor\n    # is generated by torch.rand and each tensor size is:\n    # (1, 27, 32, 32), (1, 27, 16, 16), (1, 27, 8, 8).\n    yolo_head_data = 'yolov3_head_get_bboxes.pkl'\n    pred_maps = mmcv.load(osp.join(data_path, yolo_head_data))\n\n    yolo_model.onnx_export = partial(\n        yolo_model.onnx_export, img_metas=img_metas, with_nms=False)\n    ort_validate(yolo_model.onnx_export, pred_maps)\n\n\ndef fcos_config():\n    \"\"\"FCOS Head Config.\"\"\"\n    test_cfg = mmcv.Config(\n        dict(\n            deploy_nms_pre=0,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100))\n\n    model = FCOSHead(num_classes=4, in_channels=1, test_cfg=test_cfg)\n\n    model.requires_grad_(False)\n    return model\n\n\ndef test_fcos_head_forward_single():\n    \"\"\"Test fcos forward single in torch and ort env.\"\"\"\n    fcos_model = fcos_config()\n\n    feat = torch.rand(1, fcos_model.in_channels, 32, 32)\n    fcos_model.forward_single = partial(\n        fcos_model.forward_single,\n        scale=Scale(1.0).requires_grad_(False),\n        stride=(4, ))\n    ort_validate(fcos_model.forward_single, feat)\n\n\ndef test_fcos_head_forward():\n    \"\"\"Test fcos forward in mutil-level feature map.\"\"\"\n    fcos_model = fcos_config()\n    s = 128\n    feats = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    ort_validate(fcos_model.forward, feats)\n\n\ndef test_fcos_head_onnx_export():\n    \"\"\"Test fcos head get_bboxes() in ort.\"\"\"\n    fcos_model = fcos_config()\n    s = 128\n    img_metas = [{\n        'img_shape_for_onnx': torch.Tensor([s, s]),\n        'img_shape': (s, s, 3),\n        'scale_factor': np.ones(4),\n        'pad_shape': (s, s, 3)\n    }]\n\n    cls_scores = [\n        torch.rand(1, fcos_model.num_classes, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    bboxes = [\n        torch.rand(1, 4, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n    centerness = [\n        torch.rand(1, 1, s // feat_size, s // feat_size)\n        for feat_size in [4, 8, 16, 32, 64]\n    ]\n\n    fcos_model.onnx_export = partial(\n        fcos_model.onnx_export, img_metas=img_metas, with_nms=False)\n    ort_validate(fcos_model.onnx_export, (cls_scores, bboxes, centerness))\n\n\ndef fsaf_config():\n    \"\"\"FSAF Head Config.\"\"\"\n    cfg = dict(\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=1,\n            scales_per_octave=1,\n            ratios=[1.0],\n            strides=[8, 16, 32, 64, 128]))\n\n    test_cfg = mmcv.Config(\n        dict(\n            deploy_nms_pre=0,\n            min_bbox_size=0,\n            score_thr=0.05,\n            nms=dict(type='nms', iou_threshold=0.5),\n            max_per_img=100))\n\n    model = FSAFHead(num_classes=4, in_channels=1, test_cfg=test_cfg, **cfg)\n    model.requires_grad_(False)\n    return model\n\n\ndef test_fsaf_head_forward_single():\n    \"\"\"Test RetinaNet Head forward_single() in torch and onnxruntime env.\"\"\"\n    fsaf_model = fsaf_config()\n\n    feat = torch.rand(1, fsaf_model.in_channels, 32, 32)\n    ort_validate(fsaf_model.forward_single, feat)\n\n\ndef test_fsaf_head_forward():\n    \"\"\"Test RetinaNet Head forward in torch and onnxruntime env.\"\"\"\n    fsaf_model = fsaf_config()\n    s = 128\n    feats = [\n        torch.rand(1, fsaf_model.in_channels, s // (2**(i + 2)),\n                   s // (2**(i + 2)))\n        for i in range(len(fsaf_model.anchor_generator.strides))\n    ]\n    ort_validate(fsaf_model.forward, feats)\n\n\ndef test_fsaf_head_onnx_export():\n    \"\"\"Test RetinaNet Head get_bboxes in torch and onnxruntime env.\"\"\"\n    fsaf_model = fsaf_config()\n    s = 256\n    img_metas = [{\n        'img_shape_for_onnx': torch.Tensor([s, s]),\n        'scale_factor': np.ones(4),\n        'pad_shape': (s, s, 3),\n        'img_shape': (s, s, 2)\n    }]\n\n    # The data of fsaf_head_get_bboxes.pkl contains two parts:\n    # cls_score(list(Tensor)) and bboxes(list(Tensor)),\n    # where each torch.Tensor is generated by torch.rand().\n    # the cls_score's size: (1, 4, 64, 64), (1, 4, 32, 32),\n    # (1, 4, 16, 16), (1, 4, 8, 8), (1, 4, 4, 4).\n    # the bboxes's size: (1, 4, 64, 64), (1, 4, 32, 32),\n    # (1, 4, 16, 16), (1, 4, 8, 8), (1, 4, 4, 4).\n    fsaf_head_data = 'fsaf_head_get_bboxes.pkl'\n    feats = mmcv.load(osp.join(data_path, fsaf_head_data))\n    cls_score = feats[:5]\n    bboxes = feats[5:]\n\n    fsaf_model.onnx_export = partial(\n        fsaf_model.onnx_export, img_metas=img_metas, with_nms=False)\n    ort_validate(fsaf_model.onnx_export, (cls_score, bboxes))\n\n\ndef ssd_config():\n    \"\"\"SSD Head Config.\"\"\"\n    cfg = dict(\n        anchor_generator=dict(\n            type='SSDAnchorGenerator',\n            scale_major=False,\n            input_size=300,\n            basesize_ratio_range=(0.15, 0.9),\n            strides=[8, 16, 32, 64, 100, 300],\n            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[0.1, 0.1, 0.2, 0.2]))\n\n    test_cfg = mmcv.Config(\n        dict(\n            deploy_nms_pre=0,\n            nms=dict(type='nms', iou_threshold=0.45),\n            min_bbox_size=0,\n            score_thr=0.02,\n            max_per_img=200))\n\n    model = SSDHead(\n        num_classes=4,\n        in_channels=(4, 8, 4, 2, 2, 2),\n        test_cfg=test_cfg,\n        **cfg)\n\n    model.requires_grad_(False)\n    return model\n\n\ndef test_ssd_head_forward():\n    \"\"\"Test SSD Head forward in torch and onnxruntime env.\"\"\"\n    ssd_model = ssd_config()\n\n    featmap_size = [38, 19, 10, 6, 5, 3, 1]\n\n    feats = [\n        torch.rand(1, ssd_model.in_channels[i], featmap_size[i],\n                   featmap_size[i]) for i in range(len(ssd_model.in_channels))\n    ]\n    ort_validate(ssd_model.forward, feats)\n\n\ndef test_ssd_head_onnx_export():\n    \"\"\"Test SSD Head get_bboxes in torch and onnxruntime env.\"\"\"\n    ssd_model = ssd_config()\n    s = 300\n    img_metas = [{\n        'img_shape_for_onnx': torch.Tensor([s, s]),\n        'scale_factor': np.ones(4),\n        'pad_shape': (s, s, 3),\n        'img_shape': (s, s, 2)\n    }]\n\n    # The data of ssd_head_get_bboxes.pkl contains two parts:\n    # cls_score(list(Tensor)) and bboxes(list(Tensor)),\n    # where each torch.Tensor is generated by torch.rand().\n    # the cls_score's size: (1, 20, 38, 38), (1, 30, 19, 19),\n    # (1, 30, 10, 10), (1, 30, 5, 5), (1, 20, 3, 3), (1, 20, 1, 1).\n    # the bboxes's size: (1, 16, 38, 38), (1, 24, 19, 19),\n    # (1, 24, 10, 10), (1, 24, 5, 5), (1, 16, 3, 3), (1, 16, 1, 1).\n    ssd_head_data = 'ssd_head_get_bboxes.pkl'\n    feats = mmcv.load(osp.join(data_path, ssd_head_data))\n    cls_score = feats[:6]\n    bboxes = feats[6:]\n\n    ssd_model.onnx_export = partial(\n        ssd_model.onnx_export, img_metas=img_metas, with_nms=False)\n    ort_validate(ssd_model.onnx_export, (cls_score, bboxes))\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_onnx/test_neck.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\n\nimport mmcv\nimport pytest\nimport torch\n\nfrom mmdet import digit_version\nfrom mmdet.models.necks import FPN, YOLOV3Neck\nfrom .utils import ort_validate\n\nif digit_version(torch.__version__) <= digit_version('1.5.0'):\n    pytest.skip(\n        'ort backend does not support version below 1.5.0',\n        allow_module_level=True)\n\n# Control the returned model of fpn_neck_config()\nfpn_test_step_names = {\n    'fpn_normal': 0,\n    'fpn_wo_extra_convs': 1,\n    'fpn_lateral_bns': 2,\n    'fpn_bilinear_upsample': 3,\n    'fpn_scale_factor': 4,\n    'fpn_extra_convs_inputs': 5,\n    'fpn_extra_convs_laterals': 6,\n    'fpn_extra_convs_outputs': 7,\n}\n\n# Control the returned model of yolo_neck_config()\nyolo_test_step_names = {'yolo_normal': 0}\n\ndata_path = osp.join(osp.dirname(__file__), 'data')\n\n\ndef fpn_neck_config(test_step_name):\n    \"\"\"Return the class containing the corresponding attributes according to\n    the fpn_test_step_names.\"\"\"\n    s = 64\n    in_channels = [8, 16, 32, 64]\n    feat_sizes = [s // 2**i for i in range(4)]  # [64, 32, 16, 8]\n    out_channels = 8\n\n    feats = [\n        torch.rand(1, in_channels[i], feat_sizes[i], feat_sizes[i])\n        for i in range(len(in_channels))\n    ]\n\n    if (fpn_test_step_names[test_step_name] == 0):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs=True,\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 1):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs=False,\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 2):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs=True,\n            no_norm_on_lateral=False,\n            norm_cfg=dict(type='BN', requires_grad=True),\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 3):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs=True,\n            upsample_cfg=dict(mode='bilinear', align_corners=True),\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 4):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs=True,\n            upsample_cfg=dict(scale_factor=2),\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 5):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs='on_input',\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 6):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs='on_lateral',\n            num_outs=5)\n    elif (fpn_test_step_names[test_step_name] == 7):\n        fpn_model = FPN(\n            in_channels=in_channels,\n            out_channels=out_channels,\n            add_extra_convs='on_output',\n            num_outs=5)\n    return fpn_model, feats\n\n\ndef yolo_neck_config(test_step_name):\n    \"\"\"Config yolov3 Neck.\"\"\"\n    in_channels = [16, 8, 4]\n    out_channels = [8, 4, 2]\n\n    # The data of yolov3_neck.pkl contains a list of\n    # torch.Tensor, where each torch.Tensor is generated by\n    # torch.rand and each tensor size is:\n    # (1, 4, 64, 64), (1, 8, 32, 32), (1, 16, 16, 16).\n    yolov3_neck_data = 'yolov3_neck.pkl'\n    feats = mmcv.load(osp.join(data_path, yolov3_neck_data))\n\n    if (yolo_test_step_names[test_step_name] == 0):\n        yolo_model = YOLOV3Neck(\n            in_channels=in_channels, out_channels=out_channels, num_scales=3)\n    return yolo_model, feats\n\n\ndef test_fpn_normal():\n    outs = fpn_neck_config('fpn_normal')\n    ort_validate(*outs)\n\n\ndef test_fpn_wo_extra_convs():\n    outs = fpn_neck_config('fpn_wo_extra_convs')\n    ort_validate(*outs)\n\n\ndef test_fpn_lateral_bns():\n    outs = fpn_neck_config('fpn_lateral_bns')\n    ort_validate(*outs)\n\n\ndef test_fpn_bilinear_upsample():\n    outs = fpn_neck_config('fpn_bilinear_upsample')\n    ort_validate(*outs)\n\n\ndef test_fpn_scale_factor():\n    outs = fpn_neck_config('fpn_scale_factor')\n    ort_validate(*outs)\n\n\ndef test_fpn_extra_convs_inputs():\n    outs = fpn_neck_config('fpn_extra_convs_inputs')\n    ort_validate(*outs)\n\n\ndef test_fpn_extra_convs_laterals():\n    outs = fpn_neck_config('fpn_extra_convs_laterals')\n    ort_validate(*outs)\n\n\ndef test_fpn_extra_convs_outputs():\n    outs = fpn_neck_config('fpn_extra_convs_outputs')\n    ort_validate(*outs)\n\n\ndef test_yolo_normal():\n    outs = yolo_neck_config('yolo_normal')\n    ort_validate(*outs)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_onnx/utils.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport os.path as osp\nimport warnings\n\nimport numpy as np\nimport onnx\nimport onnxruntime as ort\nimport torch\nimport torch.nn as nn\n\nort_custom_op_path = ''\ntry:\n    from mmcv.ops import get_onnxruntime_op_path\n    ort_custom_op_path = get_onnxruntime_op_path()\nexcept (ImportError, ModuleNotFoundError):\n    warnings.warn('If input model has custom op from mmcv, \\\n        you may have to build mmcv with ONNXRuntime from source.')\n\n\nclass WrapFunction(nn.Module):\n    \"\"\"Wrap the function to be tested for torch.onnx.export tracking.\"\"\"\n\n    def __init__(self, wrapped_function):\n        super(WrapFunction, self).__init__()\n        self.wrapped_function = wrapped_function\n\n    def forward(self, *args, **kwargs):\n        return self.wrapped_function(*args, **kwargs)\n\n\ndef ort_validate(model, feats, onnx_io='tmp.onnx'):\n    \"\"\"Validate the output of the onnxruntime backend is the same as the output\n    generated by torch.\n\n    Args:\n        model (nn.Module | function): the function of model or model\n            to be verified.\n        feats (tuple(list(torch.Tensor)) | list(torch.Tensor) | torch.Tensor):\n            the input of model.\n        onnx_io (str): the name of onnx output file.\n    \"\"\"\n    # if model is not an instance of nn.Module, then it is a normal\n    # function and it should be wrapped.\n    if isinstance(model, nn.Module):\n        wrap_model = model\n    else:\n        wrap_model = WrapFunction(model)\n    wrap_model.cpu().eval()\n    with torch.no_grad():\n        torch.onnx.export(\n            wrap_model,\n            feats,\n            onnx_io,\n            export_params=True,\n            keep_initializers_as_inputs=True,\n            do_constant_folding=True,\n            verbose=False,\n            opset_version=11)\n\n    if isinstance(feats, tuple):\n        ort_feats = []\n        for feat in feats:\n            ort_feats += feat\n    else:\n        ort_feats = feats\n    # default model name: tmp.onnx\n    onnx_outputs = get_ort_model_output(ort_feats)\n\n    # remove temp file\n    if osp.exists(onnx_io):\n        os.remove(onnx_io)\n\n    if isinstance(feats, tuple):\n        torch_outputs = convert_result_list(wrap_model.forward(*feats))\n    else:\n        torch_outputs = convert_result_list(wrap_model.forward(feats))\n    torch_outputs = [\n        torch_output.detach().numpy() for torch_output in torch_outputs\n    ]\n\n    # match torch_outputs and onnx_outputs\n    for i in range(len(onnx_outputs)):\n        np.testing.assert_allclose(\n            torch_outputs[i], onnx_outputs[i], rtol=1e-03, atol=1e-05)\n\n\ndef get_ort_model_output(feat, onnx_io='tmp.onnx'):\n    \"\"\"Run the model in onnxruntime env.\n\n    Args:\n        feat (list[Tensor]): A list of tensors from torch.rand,\n            each is a 4D-tensor.\n\n    Returns:\n        list[np.array]: onnxruntime infer result, each is a np.array\n    \"\"\"\n\n    onnx_model = onnx.load(onnx_io)\n    onnx.checker.check_model(onnx_model)\n\n    session_options = ort.SessionOptions()\n    # register custom op for onnxruntime\n    if osp.exists(ort_custom_op_path):\n        session_options.register_custom_ops_library(ort_custom_op_path)\n    sess = ort.InferenceSession(onnx_io, session_options)\n    if isinstance(feat, torch.Tensor):\n        onnx_outputs = sess.run(None,\n                                {sess.get_inputs()[0].name: feat.numpy()})\n    else:\n        onnx_outputs = sess.run(None, {\n            sess.get_inputs()[i].name: feat[i].numpy()\n            for i in range(len(feat))\n        })\n    return onnx_outputs\n\n\ndef convert_result_list(outputs):\n    \"\"\"Convert the torch forward outputs containing tuple or list to a list\n    only containing torch.Tensor.\n\n    Args:\n        output (list(Tensor) | tuple(list(Tensor) | ...): the outputs\n        in torch env, maybe containing nested structures such as list\n        or tuple.\n\n    Returns:\n        list(Tensor): a list only containing torch.Tensor\n    \"\"\"\n    # recursive end condition\n    if isinstance(outputs, torch.Tensor):\n        return [outputs]\n\n    ret = []\n    for sub in outputs:\n        ret += convert_result_list(sub)\n    return ret\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_runtime/async_benchmark.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport asyncio\nimport os\nimport shutil\nimport urllib\n\nimport mmcv\nimport torch\n\nfrom mmdet.apis import (async_inference_detector, inference_detector,\n                        init_detector)\nfrom mmdet.utils.contextmanagers import concurrent\nfrom mmdet.utils.profiling import profile_time\n\n\nasync def main():\n    \"\"\"Benchmark between async and synchronous inference interfaces.\n\n    Sample runs for 20 demo images on K80 GPU, model - mask_rcnn_r50_fpn_1x:\n\n    async       sync\n\n    7981.79 ms  9660.82 ms\n    8074.52 ms  9660.94 ms\n    7976.44 ms  9406.83 ms\n\n    Async variant takes about 0.83-0.85 of the time of the synchronous\n    interface.\n    \"\"\"\n    project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))\n    project_dir = os.path.join(project_dir, '..')\n\n    config_file = os.path.join(\n        project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')\n    checkpoint_file = os.path.join(\n        project_dir,\n        'checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth')\n\n    if not os.path.exists(checkpoint_file):\n        url = ('https://download.openmmlab.com/mmdetection/v2.0'\n               '/mask_rcnn/mask_rcnn_r50_fpn_1x_coco'\n               '/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth')\n        print(f'Downloading {url} ...')\n        local_filename, _ = urllib.request.urlretrieve(url)\n        os.makedirs(os.path.dirname(checkpoint_file), exist_ok=True)\n        shutil.move(local_filename, checkpoint_file)\n        print(f'Saved as {checkpoint_file}')\n    else:\n        print(f'Using existing checkpoint {checkpoint_file}')\n\n    device = 'cuda:0'\n    model = init_detector(\n        config_file, checkpoint=checkpoint_file, device=device)\n\n    # queue is used for concurrent inference of multiple images\n    streamqueue = asyncio.Queue()\n    # queue size defines concurrency level\n    streamqueue_size = 4\n\n    for _ in range(streamqueue_size):\n        streamqueue.put_nowait(torch.cuda.Stream(device=device))\n\n    # test a single image and show the results\n    img = mmcv.imread(os.path.join(project_dir, 'demo/demo.jpg'))\n\n    # warmup\n    await async_inference_detector(model, img)\n\n    async def detect(img):\n        async with concurrent(streamqueue):\n            return await async_inference_detector(model, img)\n\n    num_of_images = 20\n    with profile_time('benchmark', 'async'):\n        tasks = [\n            asyncio.create_task(detect(img)) for _ in range(num_of_images)\n        ]\n        async_results = await asyncio.gather(*tasks)\n\n    with torch.cuda.stream(torch.cuda.default_stream()):\n        with profile_time('benchmark', 'sync'):\n            sync_results = [\n                inference_detector(model, img) for _ in range(num_of_images)\n            ]\n\n    result_dir = os.path.join(project_dir, 'demo')\n    model.show_result(\n        img,\n        async_results[0],\n        score_thr=0.5,\n        show=False,\n        out_file=os.path.join(result_dir, 'result_async.jpg'))\n    model.show_result(\n        img,\n        sync_results[0],\n        score_thr=0.5,\n        show=False,\n        out_file=os.path.join(result_dir, 'result_sync.jpg'))\n\n\nif __name__ == '__main__':\n    asyncio.run(main())\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_runtime/test_apis.py",
    "content": "import os\nfrom pathlib import Path\n\nimport pytest\n\nfrom mmdet.apis import init_detector\n\n\ndef test_init_detector():\n    project_dir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))\n    project_dir = os.path.join(project_dir, '..')\n\n    config_file = os.path.join(\n        project_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')\n\n    # test init_detector with config_file: str and cfg_options\n    cfg_options = dict(\n        model=dict(\n            backbone=dict(\n                depth=18,\n                init_cfg=dict(\n                    type='Pretrained', checkpoint='torchvision://resnet18'))))\n    model = init_detector(config_file, device='cpu', cfg_options=cfg_options)\n\n    # test init_detector with :obj:`Path`\n    config_path_object = Path(config_file)\n    model = init_detector(config_path_object, device='cpu')\n\n    # test init_detector with undesirable type\n    with pytest.raises(TypeError):\n        config_list = [config_file]\n        model = init_detector(config_list)  # noqa: F841\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_runtime/test_async.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"Tests for async interface.\"\"\"\n\nimport asyncio\nimport os\nimport sys\n\nimport asynctest\nimport mmcv\nimport torch\n\nfrom mmdet.apis import async_inference_detector, init_detector\n\nif sys.version_info >= (3, 7):\n    from mmdet.utils.contextmanagers import concurrent\n\n\nclass AsyncTestCase(asynctest.TestCase):\n    use_default_loop = False\n    forbid_get_event_loop = True\n\n    TEST_TIMEOUT = int(os.getenv('ASYNCIO_TEST_TIMEOUT', '30'))\n\n    def _run_test_method(self, method):\n        result = method()\n        if asyncio.iscoroutine(result):\n            self.loop.run_until_complete(\n                asyncio.wait_for(result, timeout=self.TEST_TIMEOUT))\n\n\nclass MaskRCNNDetector:\n\n    def __init__(self,\n                 model_config,\n                 checkpoint=None,\n                 streamqueue_size=3,\n                 device='cuda:0'):\n\n        self.streamqueue_size = streamqueue_size\n        self.device = device\n        # build the model and load checkpoint\n        self.model = init_detector(\n            model_config, checkpoint=None, device=self.device)\n        self.streamqueue = None\n\n    async def init(self):\n        self.streamqueue = asyncio.Queue()\n        for _ in range(self.streamqueue_size):\n            stream = torch.cuda.Stream(device=self.device)\n            self.streamqueue.put_nowait(stream)\n\n    if sys.version_info >= (3, 7):\n\n        async def apredict(self, img):\n            if isinstance(img, str):\n                img = mmcv.imread(img)\n            async with concurrent(self.streamqueue):\n                result = await async_inference_detector(self.model, img)\n            return result\n\n\nclass AsyncInferenceTestCase(AsyncTestCase):\n\n    if sys.version_info >= (3, 7):\n\n        async def test_simple_inference(self):\n            if not torch.cuda.is_available():\n                import pytest\n\n                pytest.skip('test requires GPU and torch+cuda')\n\n            ori_grad_enabled = torch.is_grad_enabled()\n            root_dir = os.path.dirname(os.path.dirname(__name__))\n            model_config = os.path.join(\n                root_dir, 'configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py')\n            detector = MaskRCNNDetector(model_config)\n            await detector.init()\n            img_path = os.path.join(root_dir, 'demo/demo.jpg')\n            bboxes, _ = await detector.apredict(img_path)\n            self.assertTrue(bboxes)\n            # asy inference detector will hack grad_enabled,\n            # so restore here to avoid it to influence other tests\n            torch.set_grad_enabled(ori_grad_enabled)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_runtime/test_config.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom os.path import dirname, exists, join\nfrom unittest.mock import Mock\n\nimport pytest\n\nfrom mmdet.core import BitmapMasks, PolygonMasks\nfrom mmdet.datasets.builder import DATASETS\nfrom mmdet.datasets.utils import NumClassCheckHook\n\n\ndef _get_config_directory():\n    \"\"\"Find the predefined detector config directory.\"\"\"\n    try:\n        # Assume we are running in the source mmdetection repo\n        repo_dpath = dirname(dirname(__file__))\n        repo_dpath = join(repo_dpath, '..')\n    except NameError:\n        # For IPython development when this __file__ is not defined\n        import mmdet\n        repo_dpath = dirname(dirname(mmdet.__file__))\n    config_dpath = join(repo_dpath, 'configs')\n    if not exists(config_dpath):\n        raise Exception('Cannot find config path')\n    return config_dpath\n\n\ndef _check_numclasscheckhook(detector, config_mod):\n    dummy_runner = Mock()\n    dummy_runner.model = detector\n\n    def get_dataset_name_classes(dataset):\n        # deal with `RepeatDataset`,`ConcatDataset`,`ClassBalancedDataset`..\n        if isinstance(dataset, (list, tuple)):\n            dataset = dataset[0]\n        while ('dataset' in dataset):\n            dataset = dataset['dataset']\n            # ConcatDataset\n            if isinstance(dataset, (list, tuple)):\n                dataset = dataset[0]\n        return dataset['type'], dataset.get('classes', None)\n\n    compatible_check = NumClassCheckHook()\n    dataset_name, CLASSES = get_dataset_name_classes(\n        config_mod['data']['train'])\n    if CLASSES is None:\n        CLASSES = DATASETS.get(dataset_name).CLASSES\n    dummy_runner.data_loader.dataset.CLASSES = CLASSES\n    compatible_check.before_train_epoch(dummy_runner)\n\n    dummy_runner.data_loader.dataset.CLASSES = None\n    compatible_check.before_train_epoch(dummy_runner)\n\n    dataset_name, CLASSES = get_dataset_name_classes(config_mod['data']['val'])\n    if CLASSES is None:\n        CLASSES = DATASETS.get(dataset_name).CLASSES\n    dummy_runner.data_loader.dataset.CLASSES = CLASSES\n    compatible_check.before_val_epoch(dummy_runner)\n    dummy_runner.data_loader.dataset.CLASSES = None\n    compatible_check.before_val_epoch(dummy_runner)\n\n\ndef _check_roi_head(config, head):\n    # check consistency between head_config and roi_head\n    assert config['type'] == head.__class__.__name__\n\n    # check roi_align\n    bbox_roi_cfg = config.bbox_roi_extractor\n    bbox_roi_extractor = head.bbox_roi_extractor\n    _check_roi_extractor(bbox_roi_cfg, bbox_roi_extractor)\n\n    # check bbox head infos\n    bbox_cfg = config.bbox_head\n    bbox_head = head.bbox_head\n    _check_bbox_head(bbox_cfg, bbox_head)\n\n    if head.with_mask:\n        # check roi_align\n        if config.mask_roi_extractor:\n            mask_roi_cfg = config.mask_roi_extractor\n            mask_roi_extractor = head.mask_roi_extractor\n            _check_roi_extractor(mask_roi_cfg, mask_roi_extractor,\n                                 bbox_roi_extractor)\n\n        # check mask head infos\n        mask_head = head.mask_head\n        mask_cfg = config.mask_head\n        _check_mask_head(mask_cfg, mask_head)\n\n    # check arch specific settings, e.g., cascade/htc\n    if config['type'] in ['CascadeRoIHead', 'HybridTaskCascadeRoIHead']:\n        assert config.num_stages == len(head.bbox_head)\n        assert config.num_stages == len(head.bbox_roi_extractor)\n\n        if head.with_mask:\n            assert config.num_stages == len(head.mask_head)\n            assert config.num_stages == len(head.mask_roi_extractor)\n\n    elif config['type'] in ['MaskScoringRoIHead']:\n        assert (hasattr(head, 'mask_iou_head')\n                and head.mask_iou_head is not None)\n        mask_iou_cfg = config.mask_iou_head\n        mask_iou_head = head.mask_iou_head\n        assert (mask_iou_cfg.fc_out_channels ==\n                mask_iou_head.fc_mask_iou.in_features)\n\n    elif config['type'] in ['GridRoIHead']:\n        grid_roi_cfg = config.grid_roi_extractor\n        grid_roi_extractor = head.grid_roi_extractor\n        _check_roi_extractor(grid_roi_cfg, grid_roi_extractor,\n                             bbox_roi_extractor)\n\n        config.grid_head.grid_points = head.grid_head.grid_points\n\n\ndef _check_roi_extractor(config, roi_extractor, prev_roi_extractor=None):\n    import torch.nn as nn\n\n    # Separate roi_extractor and prev_roi_extractor checks for flexibility\n    if isinstance(roi_extractor, nn.ModuleList):\n        roi_extractor = roi_extractor[0]\n    if prev_roi_extractor and isinstance(prev_roi_extractor, nn.ModuleList):\n        prev_roi_extractor = prev_roi_extractor[0]\n\n    assert (len(config.featmap_strides) == len(roi_extractor.roi_layers))\n    assert (config.out_channels == roi_extractor.out_channels)\n    from torch.nn.modules.utils import _pair\n    assert (_pair(config.roi_layer.output_size) ==\n            roi_extractor.roi_layers[0].output_size)\n\n    if 'use_torchvision' in config.roi_layer:\n        assert (config.roi_layer.use_torchvision ==\n                roi_extractor.roi_layers[0].use_torchvision)\n    elif 'aligned' in config.roi_layer:\n        assert (\n            config.roi_layer.aligned == roi_extractor.roi_layers[0].aligned)\n\n    if prev_roi_extractor:\n        assert (roi_extractor.roi_layers[0].aligned ==\n                prev_roi_extractor.roi_layers[0].aligned)\n        assert (roi_extractor.roi_layers[0].use_torchvision ==\n                prev_roi_extractor.roi_layers[0].use_torchvision)\n\n\ndef _check_mask_head(mask_cfg, mask_head):\n    import torch.nn as nn\n    if isinstance(mask_cfg, list):\n        for single_mask_cfg, single_mask_head in zip(mask_cfg, mask_head):\n            _check_mask_head(single_mask_cfg, single_mask_head)\n    elif isinstance(mask_head, nn.ModuleList):\n        for single_mask_head in mask_head:\n            _check_mask_head(mask_cfg, single_mask_head)\n    else:\n        assert mask_cfg['type'] == mask_head.__class__.__name__\n        assert mask_cfg.in_channels == mask_head.in_channels\n        class_agnostic = mask_cfg.get('class_agnostic', False)\n        out_dim = (1 if class_agnostic else mask_cfg.num_classes)\n        if hasattr(mask_head, 'conv_logits'):\n            assert (mask_cfg.conv_out_channels ==\n                    mask_head.conv_logits.in_channels)\n            assert mask_head.conv_logits.out_channels == out_dim\n        else:\n            assert mask_cfg.fc_out_channels == mask_head.fc_logits.in_features\n            assert (mask_head.fc_logits.out_features == out_dim *\n                    mask_head.output_area)\n\n\ndef _check_bbox_head(bbox_cfg, bbox_head):\n    import torch.nn as nn\n    if isinstance(bbox_cfg, list):\n        for single_bbox_cfg, single_bbox_head in zip(bbox_cfg, bbox_head):\n            _check_bbox_head(single_bbox_cfg, single_bbox_head)\n    elif isinstance(bbox_head, nn.ModuleList):\n        for single_bbox_head in bbox_head:\n            _check_bbox_head(bbox_cfg, single_bbox_head)\n    else:\n        assert bbox_cfg['type'] == bbox_head.__class__.__name__\n        if bbox_cfg['type'] == 'SABLHead':\n            assert bbox_cfg.cls_in_channels == bbox_head.cls_in_channels\n            assert bbox_cfg.reg_in_channels == bbox_head.reg_in_channels\n\n            cls_out_channels = bbox_cfg.get('cls_out_channels', 1024)\n            assert (cls_out_channels == bbox_head.fc_cls.in_features)\n            assert (bbox_cfg.num_classes + 1 == bbox_head.fc_cls.out_features)\n\n        elif bbox_cfg['type'] == 'DIIHead':\n            assert bbox_cfg['num_ffn_fcs'] == bbox_head.ffn.num_fcs\n            # 3 means FC and LN and Relu\n            assert bbox_cfg['num_cls_fcs'] == len(bbox_head.cls_fcs) // 3\n            assert bbox_cfg['num_reg_fcs'] == len(bbox_head.reg_fcs) // 3\n            assert bbox_cfg['in_channels'] == bbox_head.in_channels\n            assert bbox_cfg['in_channels'] == bbox_head.fc_cls.in_features\n            assert bbox_cfg['in_channels'] == bbox_head.fc_reg.in_features\n            assert bbox_cfg['in_channels'] == bbox_head.attention.embed_dims\n            assert bbox_cfg[\n                'feedforward_channels'] == bbox_head.ffn.feedforward_channels\n\n        else:\n            assert bbox_cfg.in_channels == bbox_head.in_channels\n            with_cls = bbox_cfg.get('with_cls', True)\n\n            if with_cls:\n                fc_out_channels = bbox_cfg.get('fc_out_channels', 2048)\n                assert (fc_out_channels == bbox_head.fc_cls.in_features)\n                if bbox_head.custom_cls_channels:\n                    assert (bbox_head.loss_cls.get_cls_channels(\n                        bbox_head.num_classes) == bbox_head.fc_cls.out_features\n                            )\n                else:\n                    assert (bbox_cfg.num_classes +\n                            1 == bbox_head.fc_cls.out_features)\n            with_reg = bbox_cfg.get('with_reg', True)\n            if with_reg:\n                out_dim = (4 if bbox_cfg.reg_class_agnostic else 4 *\n                           bbox_cfg.num_classes)\n                assert bbox_head.fc_reg.out_features == out_dim\n\n\ndef _check_anchorhead(config, head):\n    # check consistency between head_config and roi_head\n    assert config['type'] == head.__class__.__name__\n    assert config.in_channels == head.in_channels\n\n    num_classes = (\n        config.num_classes -\n        1 if config.loss_cls.get('use_sigmoid', False) else config.num_classes)\n    if config['type'] == 'ATSSHead':\n        assert (config.feat_channels == head.atss_cls.in_channels)\n        assert (config.feat_channels == head.atss_reg.in_channels)\n        assert (config.feat_channels == head.atss_centerness.in_channels)\n    elif config['type'] == 'SABLRetinaHead':\n        assert (config.feat_channels == head.retina_cls.in_channels)\n        assert (config.feat_channels == head.retina_bbox_reg.in_channels)\n        assert (config.feat_channels == head.retina_bbox_cls.in_channels)\n    else:\n        assert (config.in_channels == head.conv_cls.in_channels)\n        assert (config.in_channels == head.conv_reg.in_channels)\n        assert (head.conv_cls.out_channels == num_classes * head.num_anchors)\n        assert head.fc_reg.out_channels == 4 * head.num_anchors\n\n\n# Only tests a representative subset of configurations\n# TODO: test pipelines using Albu, current Albu throw None given empty GT\n@pytest.mark.parametrize(\n    'config_rpath',\n    [\n        'wider_face/ssd300_wider_face.py',\n        'pascal_voc/ssd300_voc0712.py',\n        'pascal_voc/ssd512_voc0712.py',\n        # 'albu_example/mask_rcnn_r50_fpn_1x.py',\n        'foveabox/fovea_align_r50_fpn_gn-head_mstrain_640-800_4x4_2x_coco.py',\n        'mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_1x_coco.py',\n        'mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain_1x_coco.py',\n        'mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py'\n    ])\ndef test_config_data_pipeline(config_rpath):\n    \"\"\"Test whether the data pipeline is valid and can process corner cases.\n\n    CommandLine:\n        xdoctest -m tests/test_runtime/\n            test_config.py test_config_build_data_pipeline\n    \"\"\"\n    import numpy as np\n    from mmcv import Config\n\n    from mmdet.datasets.pipelines import Compose\n\n    config_dpath = _get_config_directory()\n    print(f'Found config_dpath = {config_dpath}')\n\n    def dummy_masks(h, w, num_obj=3, mode='bitmap'):\n        assert mode in ('polygon', 'bitmap')\n        if mode == 'bitmap':\n            masks = np.random.randint(0, 2, (num_obj, h, w), dtype=np.uint8)\n            masks = BitmapMasks(masks, h, w)\n        else:\n            masks = []\n            for i in range(num_obj):\n                masks.append([])\n                masks[-1].append(\n                    np.random.uniform(0, min(h - 1, w - 1), (8 + 4 * i, )))\n                masks[-1].append(\n                    np.random.uniform(0, min(h - 1, w - 1), (10 + 4 * i, )))\n            masks = PolygonMasks(masks, h, w)\n        return masks\n\n    config_fpath = join(config_dpath, config_rpath)\n    cfg = Config.fromfile(config_fpath)\n\n    # remove loading pipeline\n    loading_pipeline = cfg.train_pipeline.pop(0)\n    loading_ann_pipeline = cfg.train_pipeline.pop(0)\n    cfg.test_pipeline.pop(0)\n\n    train_pipeline = Compose(cfg.train_pipeline)\n    test_pipeline = Compose(cfg.test_pipeline)\n\n    print(f'Building data pipeline, config_fpath = {config_fpath}')\n\n    print(f'Test training data pipeline: \\n{train_pipeline!r}')\n    img = np.random.randint(0, 255, size=(888, 666, 3), dtype=np.uint8)\n    if loading_pipeline.get('to_float32', False):\n        img = img.astype(np.float32)\n    mode = 'bitmap' if loading_ann_pipeline.get('poly2mask',\n                                                True) else 'polygon'\n    results = dict(\n        filename='test_img.png',\n        ori_filename='test_img.png',\n        img=img,\n        img_shape=img.shape,\n        ori_shape=img.shape,\n        gt_bboxes=np.array([[35.2, 11.7, 39.7, 15.7]], dtype=np.float32),\n        gt_labels=np.array([1], dtype=np.int64),\n        gt_masks=dummy_masks(img.shape[0], img.shape[1], mode=mode),\n    )\n    results['img_fields'] = ['img']\n    results['bbox_fields'] = ['gt_bboxes']\n    results['mask_fields'] = ['gt_masks']\n    output_results = train_pipeline(results)\n    assert output_results is not None\n\n    print(f'Test testing data pipeline: \\n{test_pipeline!r}')\n    results = dict(\n        filename='test_img.png',\n        ori_filename='test_img.png',\n        img=img,\n        img_shape=img.shape,\n        ori_shape=img.shape,\n        gt_bboxes=np.array([[35.2, 11.7, 39.7, 15.7]], dtype=np.float32),\n        gt_labels=np.array([1], dtype=np.int64),\n        gt_masks=dummy_masks(img.shape[0], img.shape[1], mode=mode),\n    )\n    results['img_fields'] = ['img']\n    results['bbox_fields'] = ['gt_bboxes']\n    results['mask_fields'] = ['gt_masks']\n    output_results = test_pipeline(results)\n    assert output_results is not None\n\n    # test empty GT\n    print('Test empty GT with training data pipeline: '\n          f'\\n{train_pipeline!r}')\n    results = dict(\n        filename='test_img.png',\n        ori_filename='test_img.png',\n        img=img,\n        img_shape=img.shape,\n        ori_shape=img.shape,\n        gt_bboxes=np.zeros((0, 4), dtype=np.float32),\n        gt_labels=np.array([], dtype=np.int64),\n        gt_masks=dummy_masks(img.shape[0], img.shape[1], num_obj=0, mode=mode),\n    )\n    results['img_fields'] = ['img']\n    results['bbox_fields'] = ['gt_bboxes']\n    results['mask_fields'] = ['gt_masks']\n    output_results = train_pipeline(results)\n    assert output_results is not None\n\n    print(f'Test empty GT with testing data pipeline: \\n{test_pipeline!r}')\n    results = dict(\n        filename='test_img.png',\n        ori_filename='test_img.png',\n        img=img,\n        img_shape=img.shape,\n        ori_shape=img.shape,\n        gt_bboxes=np.zeros((0, 4), dtype=np.float32),\n        gt_labels=np.array([], dtype=np.int64),\n        gt_masks=dummy_masks(img.shape[0], img.shape[1], num_obj=0, mode=mode),\n    )\n    results['img_fields'] = ['img']\n    results['bbox_fields'] = ['gt_bboxes']\n    results['mask_fields'] = ['gt_masks']\n    output_results = test_pipeline(results)\n    assert output_results is not None\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_runtime/test_eval_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport tempfile\nimport unittest.mock as mock\nfrom collections import OrderedDict\nfrom unittest.mock import MagicMock, patch\n\nimport pytest\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import EpochBasedRunner, build_optimizer\nfrom mmcv.utils import get_logger\nfrom torch.utils.data import DataLoader, Dataset\n\nfrom mmdet.core import DistEvalHook, EvalHook\n\n\nclass ExampleDataset(Dataset):\n\n    def __init__(self):\n        self.index = 0\n        self.eval_result = [0.1, 0.4, 0.3, 0.7, 0.2, 0.05, 0.4, 0.6]\n\n    def __getitem__(self, idx):\n        results = dict(imgs=torch.tensor([1]))\n        return results\n\n    def __len__(self):\n        return 1\n\n    @mock.create_autospec\n    def evaluate(self, results, logger=None):\n        pass\n\n\nclass EvalDataset(ExampleDataset):\n\n    def evaluate(self, results, logger=None):\n        mean_ap = self.eval_result[self.index]\n        output = OrderedDict(mAP=mean_ap, index=self.index, score=mean_ap)\n        self.index += 1\n        return output\n\n\nclass ExampleModel(nn.Module):\n\n    def __init__(self):\n        super().__init__()\n        self.conv = nn.Linear(1, 1)\n        self.test_cfg = None\n\n    def forward(self, imgs, rescale=False, return_loss=False):\n        return imgs\n\n    def train_step(self, data_batch, optimizer, **kwargs):\n        outputs = {\n            'loss': 0.5,\n            'log_vars': {\n                'accuracy': 0.98\n            },\n            'num_samples': 1\n        }\n        return outputs\n\n\n@pytest.mark.skipif(\n    not torch.cuda.is_available(), reason='requires CUDA support')\n@patch('mmdet.apis.single_gpu_test', MagicMock)\n@patch('mmdet.apis.multi_gpu_test', MagicMock)\n@pytest.mark.parametrize('EvalHookCls', (EvalHook, DistEvalHook))\ndef test_eval_hook(EvalHookCls):\n    with pytest.raises(TypeError):\n        # dataloader must be a pytorch DataLoader\n        test_dataset = ExampleDataset()\n        data_loader = [\n            DataLoader(\n                test_dataset,\n                batch_size=1,\n                sampler=None,\n                num_worker=0,\n                shuffle=False)\n        ]\n        EvalHookCls(data_loader)\n\n    with pytest.raises(KeyError):\n        # rule must be in keys of rule_map\n        test_dataset = ExampleDataset()\n        data_loader = DataLoader(\n            test_dataset,\n            batch_size=1,\n            sampler=None,\n            num_workers=0,\n            shuffle=False)\n        EvalHookCls(data_loader, save_best='auto', rule='unsupport')\n\n    with pytest.raises(ValueError):\n        # key_indicator must be valid when rule_map is None\n        test_dataset = ExampleDataset()\n        data_loader = DataLoader(\n            test_dataset,\n            batch_size=1,\n            sampler=None,\n            num_workers=0,\n            shuffle=False)\n        EvalHookCls(data_loader, save_best='unsupport')\n\n    optimizer_cfg = dict(\n        type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)\n\n    test_dataset = ExampleDataset()\n    loader = DataLoader(test_dataset, batch_size=1)\n    model = ExampleModel()\n    optimizer = build_optimizer(model, optimizer_cfg)\n\n    data_loader = DataLoader(test_dataset, batch_size=1)\n    eval_hook = EvalHookCls(data_loader, save_best=None)\n    with tempfile.TemporaryDirectory() as tmpdir:\n        logger = get_logger('test_eval')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_hook(eval_hook)\n        runner.run([loader], [('train', 1)], 1)\n        assert runner.meta is None or 'best_score' not in runner.meta[\n            'hook_msgs']\n        assert runner.meta is None or 'best_ckpt' not in runner.meta[\n            'hook_msgs']\n\n    # when `save_best` is set to 'auto', first metric will be used.\n    loader = DataLoader(EvalDataset(), batch_size=1)\n    model = ExampleModel()\n    data_loader = DataLoader(EvalDataset(), batch_size=1)\n    eval_hook = EvalHookCls(data_loader, interval=1, save_best='auto')\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        logger = get_logger('test_eval')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_checkpoint_hook(dict(interval=1))\n        runner.register_hook(eval_hook)\n        runner.run([loader], [('train', 1)], 8)\n\n        real_path = osp.join(tmpdir, 'best_mAP_epoch_4.pth')\n\n        assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)\n        assert runner.meta['hook_msgs']['best_score'] == 0.7\n\n    loader = DataLoader(EvalDataset(), batch_size=1)\n    model = ExampleModel()\n    data_loader = DataLoader(EvalDataset(), batch_size=1)\n    eval_hook = EvalHookCls(data_loader, interval=1, save_best='mAP')\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        logger = get_logger('test_eval')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_checkpoint_hook(dict(interval=1))\n        runner.register_hook(eval_hook)\n        runner.run([loader], [('train', 1)], 8)\n\n        real_path = osp.join(tmpdir, 'best_mAP_epoch_4.pth')\n\n        assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)\n        assert runner.meta['hook_msgs']['best_score'] == 0.7\n\n    data_loader = DataLoader(EvalDataset(), batch_size=1)\n    eval_hook = EvalHookCls(\n        data_loader, interval=1, save_best='score', rule='greater')\n    with tempfile.TemporaryDirectory() as tmpdir:\n        logger = get_logger('test_eval')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_checkpoint_hook(dict(interval=1))\n        runner.register_hook(eval_hook)\n        runner.run([loader], [('train', 1)], 8)\n\n        real_path = osp.join(tmpdir, 'best_score_epoch_4.pth')\n\n        assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)\n        assert runner.meta['hook_msgs']['best_score'] == 0.7\n\n    data_loader = DataLoader(EvalDataset(), batch_size=1)\n    eval_hook = EvalHookCls(data_loader, save_best='mAP', rule='less')\n    with tempfile.TemporaryDirectory() as tmpdir:\n        logger = get_logger('test_eval')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_checkpoint_hook(dict(interval=1))\n        runner.register_hook(eval_hook)\n        runner.run([loader], [('train', 1)], 8)\n\n        real_path = osp.join(tmpdir, 'best_mAP_epoch_6.pth')\n\n        assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)\n        assert runner.meta['hook_msgs']['best_score'] == 0.05\n\n    data_loader = DataLoader(EvalDataset(), batch_size=1)\n    eval_hook = EvalHookCls(data_loader, save_best='mAP')\n    with tempfile.TemporaryDirectory() as tmpdir:\n        logger = get_logger('test_eval')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_checkpoint_hook(dict(interval=1))\n        runner.register_hook(eval_hook)\n        runner.run([loader], [('train', 1)], 2)\n\n        real_path = osp.join(tmpdir, 'best_mAP_epoch_2.pth')\n\n        assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)\n        assert runner.meta['hook_msgs']['best_score'] == 0.4\n\n        resume_from = osp.join(tmpdir, 'latest.pth')\n        loader = DataLoader(ExampleDataset(), batch_size=1)\n        eval_hook = EvalHookCls(data_loader, save_best='mAP')\n        runner = EpochBasedRunner(\n            model=model,\n            batch_processor=None,\n            optimizer=optimizer,\n            work_dir=tmpdir,\n            logger=logger)\n        runner.register_checkpoint_hook(dict(interval=1))\n        runner.register_hook(eval_hook)\n        runner.resume(resume_from)\n        runner.run([loader], [('train', 1)], 8)\n\n        real_path = osp.join(tmpdir, 'best_mAP_epoch_4.pth')\n\n        assert runner.meta['hook_msgs']['best_ckpt'] == osp.realpath(real_path)\n        assert runner.meta['hook_msgs']['best_score'] == 0.7\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_runtime/test_fp16.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport pytest\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import auto_fp16, force_fp32\nfrom mmcv.runner.fp16_utils import cast_tensor_type\n\n\ndef test_cast_tensor_type():\n    inputs = torch.FloatTensor([5.])\n    src_type = torch.float32\n    dst_type = torch.int32\n    outputs = cast_tensor_type(inputs, src_type, dst_type)\n    assert isinstance(outputs, torch.Tensor)\n    assert outputs.dtype == dst_type\n\n    inputs = 'tensor'\n    src_type = str\n    dst_type = str\n    outputs = cast_tensor_type(inputs, src_type, dst_type)\n    assert isinstance(outputs, str)\n\n    inputs = np.array([5.])\n    src_type = np.ndarray\n    dst_type = np.ndarray\n    outputs = cast_tensor_type(inputs, src_type, dst_type)\n    assert isinstance(outputs, np.ndarray)\n\n    inputs = dict(\n        tensor_a=torch.FloatTensor([1.]), tensor_b=torch.FloatTensor([2.]))\n    src_type = torch.float32\n    dst_type = torch.int32\n    outputs = cast_tensor_type(inputs, src_type, dst_type)\n    assert isinstance(outputs, dict)\n    assert outputs['tensor_a'].dtype == dst_type\n    assert outputs['tensor_b'].dtype == dst_type\n\n    inputs = [torch.FloatTensor([1.]), torch.FloatTensor([2.])]\n    src_type = torch.float32\n    dst_type = torch.int32\n    outputs = cast_tensor_type(inputs, src_type, dst_type)\n    assert isinstance(outputs, list)\n    assert outputs[0].dtype == dst_type\n    assert outputs[1].dtype == dst_type\n\n    inputs = 5\n    outputs = cast_tensor_type(inputs, None, None)\n    assert isinstance(outputs, int)\n\n\ndef test_auto_fp16():\n\n    with pytest.raises(TypeError):\n        # ExampleObject is not a subclass of nn.Module\n\n        class ExampleObject:\n\n            @auto_fp16()\n            def __call__(self, x):\n                return x\n\n        model = ExampleObject()\n        input_x = torch.ones(1, dtype=torch.float32)\n        model(input_x)\n\n    # apply to all input args\n    class ExampleModule(nn.Module):\n\n        @auto_fp16()\n        def forward(self, x, y):\n            return x, y\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.float32)\n    input_y = torch.ones(1, dtype=torch.float32)\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.float32\n\n    model.fp16_enabled = True\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.half\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y = model(input_x.cuda(), input_y.cuda())\n        assert output_x.dtype == torch.half\n        assert output_y.dtype == torch.half\n\n    # apply to specified input args\n    class ExampleModule(nn.Module):\n\n        @auto_fp16(apply_to=('x', ))\n        def forward(self, x, y):\n            return x, y\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.float32)\n    input_y = torch.ones(1, dtype=torch.float32)\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.float32\n\n    model.fp16_enabled = True\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.float32\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y = model(input_x.cuda(), input_y.cuda())\n        assert output_x.dtype == torch.half\n        assert output_y.dtype == torch.float32\n\n    # apply to optional input args\n    class ExampleModule(nn.Module):\n\n        @auto_fp16(apply_to=('x', 'y'))\n        def forward(self, x, y=None, z=None):\n            return x, y, z\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.float32)\n    input_y = torch.ones(1, dtype=torch.float32)\n    input_z = torch.ones(1, dtype=torch.float32)\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.float32\n    assert output_z.dtype == torch.float32\n\n    model.fp16_enabled = True\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.half\n    assert output_z.dtype == torch.float32\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y, output_z = model(\n            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())\n        assert output_x.dtype == torch.half\n        assert output_y.dtype == torch.half\n        assert output_z.dtype == torch.float32\n\n    # out_fp32=True\n    class ExampleModule(nn.Module):\n\n        @auto_fp16(apply_to=('x', 'y'), out_fp32=True)\n        def forward(self, x, y=None, z=None):\n            return x, y, z\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.half)\n    input_y = torch.ones(1, dtype=torch.float32)\n    input_z = torch.ones(1, dtype=torch.float32)\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.float32\n    assert output_z.dtype == torch.float32\n\n    model.fp16_enabled = True\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.float32\n    assert output_z.dtype == torch.float32\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y, output_z = model(\n            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())\n        assert output_x.dtype == torch.float32\n        assert output_y.dtype == torch.float32\n        assert output_z.dtype == torch.float32\n\n\ndef test_force_fp32():\n\n    with pytest.raises(TypeError):\n        # ExampleObject is not a subclass of nn.Module\n\n        class ExampleObject:\n\n            @force_fp32()\n            def __call__(self, x):\n                return x\n\n        model = ExampleObject()\n        input_x = torch.ones(1, dtype=torch.float32)\n        model(input_x)\n\n    # apply to all input args\n    class ExampleModule(nn.Module):\n\n        @force_fp32()\n        def forward(self, x, y):\n            return x, y\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.half)\n    input_y = torch.ones(1, dtype=torch.half)\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.half\n\n    model.fp16_enabled = True\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.float32\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y = model(input_x.cuda(), input_y.cuda())\n        assert output_x.dtype == torch.float32\n        assert output_y.dtype == torch.float32\n\n    # apply to specified input args\n    class ExampleModule(nn.Module):\n\n        @force_fp32(apply_to=('x', ))\n        def forward(self, x, y):\n            return x, y\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.half)\n    input_y = torch.ones(1, dtype=torch.half)\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.half\n\n    model.fp16_enabled = True\n    output_x, output_y = model(input_x, input_y)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.half\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y = model(input_x.cuda(), input_y.cuda())\n        assert output_x.dtype == torch.float32\n        assert output_y.dtype == torch.half\n\n    # apply to optional input args\n    class ExampleModule(nn.Module):\n\n        @force_fp32(apply_to=('x', 'y'))\n        def forward(self, x, y=None, z=None):\n            return x, y, z\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.half)\n    input_y = torch.ones(1, dtype=torch.half)\n    input_z = torch.ones(1, dtype=torch.half)\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.half\n    assert output_z.dtype == torch.half\n\n    model.fp16_enabled = True\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.float32\n    assert output_z.dtype == torch.half\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y, output_z = model(\n            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())\n        assert output_x.dtype == torch.float32\n        assert output_y.dtype == torch.float32\n        assert output_z.dtype == torch.half\n\n    # out_fp16=True\n    class ExampleModule(nn.Module):\n\n        @force_fp32(apply_to=('x', 'y'), out_fp16=True)\n        def forward(self, x, y=None, z=None):\n            return x, y, z\n\n    model = ExampleModule()\n    input_x = torch.ones(1, dtype=torch.float32)\n    input_y = torch.ones(1, dtype=torch.half)\n    input_z = torch.ones(1, dtype=torch.half)\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.float32\n    assert output_y.dtype == torch.half\n    assert output_z.dtype == torch.half\n\n    model.fp16_enabled = True\n    output_x, output_y, output_z = model(input_x, y=input_y, z=input_z)\n    assert output_x.dtype == torch.half\n    assert output_y.dtype == torch.half\n    assert output_z.dtype == torch.half\n\n    if torch.cuda.is_available():\n        model.cuda()\n        output_x, output_y, output_z = model(\n            input_x.cuda(), y=input_y.cuda(), z=input_z.cuda())\n        assert output_x.dtype == torch.half\n        assert output_y.dtype == torch.half\n        assert output_z.dtype == torch.half\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_anchor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"\nCommandLine:\n    pytest tests/test_utils/test_anchor.py\n    xdoctest tests/test_utils/test_anchor.py zero\n\n\"\"\"\nimport pytest\nimport torch\n\n\ndef test_standard_points_generator():\n    from mmdet.core.anchor import build_prior_generator\n\n    # teat init\n    anchor_generator_cfg = dict(\n        type='MlvlPointGenerator', strides=[4, 8], offset=0)\n    anchor_generator = build_prior_generator(anchor_generator_cfg)\n    assert anchor_generator is not None\n    assert anchor_generator.num_base_priors == [1, 1]\n    # test_stride\n    from mmdet.core.anchor import MlvlPointGenerator\n\n    # Square strides\n    mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)\n    mlvl_points_half_stride_generator = MlvlPointGenerator(\n        strides=[4, 10], offset=0.5)\n    assert mlvl_points.num_levels == 2\n\n    # assert self.num_levels == len(featmap_sizes)\n    with pytest.raises(AssertionError):\n        mlvl_points.grid_priors(featmap_sizes=[(2, 2)], device='cpu')\n    priors = mlvl_points.grid_priors(\n        featmap_sizes=[(2, 2), (4, 8)], device='cpu')\n    priors_with_stride = mlvl_points.grid_priors(\n        featmap_sizes=[(2, 2), (4, 8)], with_stride=True, device='cpu')\n    assert len(priors) == 2\n\n    # assert last dimension is (coord_x, coord_y, stride_w, stride_h).\n    assert priors_with_stride[0].size(1) == 4\n    assert priors_with_stride[0][0][2] == 4\n    assert priors_with_stride[0][0][3] == 4\n    assert priors_with_stride[1][0][2] == 10\n    assert priors_with_stride[1][0][3] == 10\n\n    stride_4_feat_2_2 = priors[0]\n    assert (stride_4_feat_2_2[1] - stride_4_feat_2_2[0]).sum() == 4\n    assert stride_4_feat_2_2.size(0) == 4\n    assert stride_4_feat_2_2.size(1) == 2\n\n    stride_10_feat_4_8 = priors[1]\n    assert (stride_10_feat_4_8[1] - stride_10_feat_4_8[0]).sum() == 10\n    assert stride_10_feat_4_8.size(0) == 4 * 8\n    assert stride_10_feat_4_8.size(1) == 2\n\n    # assert the offset of 0.5 * stride\n    priors_half_offset = mlvl_points_half_stride_generator.grid_priors(\n        featmap_sizes=[(2, 2), (4, 8)], device='cpu')\n\n    assert (priors_half_offset[0][0] - priors[0][0]).sum() == 4 * 0.5 * 2\n    assert (priors_half_offset[1][0] - priors[1][0]).sum() == 10 * 0.5 * 2\n    if torch.cuda.is_available():\n        anchor_generator_cfg = dict(\n            type='MlvlPointGenerator', strides=[4, 8], offset=0)\n        anchor_generator = build_prior_generator(anchor_generator_cfg)\n        assert anchor_generator is not None\n        # Square strides\n        mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)\n        mlvl_points_half_stride_generator = MlvlPointGenerator(\n            strides=[4, 10], offset=0.5)\n        assert mlvl_points.num_levels == 2\n\n        # assert self.num_levels == len(featmap_sizes)\n        with pytest.raises(AssertionError):\n            mlvl_points.grid_priors(featmap_sizes=[(2, 2)], device='cuda')\n        priors = mlvl_points.grid_priors(\n            featmap_sizes=[(2, 2), (4, 8)], device='cuda')\n        priors_with_stride = mlvl_points.grid_priors(\n            featmap_sizes=[(2, 2), (4, 8)], with_stride=True, device='cuda')\n        assert len(priors) == 2\n\n        # assert last dimension is (coord_x, coord_y, stride_w, stride_h).\n        assert priors_with_stride[0].size(1) == 4\n        assert priors_with_stride[0][0][2] == 4\n        assert priors_with_stride[0][0][3] == 4\n        assert priors_with_stride[1][0][2] == 10\n        assert priors_with_stride[1][0][3] == 10\n\n        stride_4_feat_2_2 = priors[0]\n        assert (stride_4_feat_2_2[1] - stride_4_feat_2_2[0]).sum() == 4\n        assert stride_4_feat_2_2.size(0) == 4\n        assert stride_4_feat_2_2.size(1) == 2\n\n        stride_10_feat_4_8 = priors[1]\n        assert (stride_10_feat_4_8[1] - stride_10_feat_4_8[0]).sum() == 10\n        assert stride_10_feat_4_8.size(0) == 4 * 8\n        assert stride_10_feat_4_8.size(1) == 2\n\n        # assert the offset of 0.5 * stride\n        priors_half_offset = mlvl_points_half_stride_generator.grid_priors(\n            featmap_sizes=[(2, 2), (4, 8)], device='cuda')\n\n        assert (priors_half_offset[0][0] - priors[0][0]).sum() == 4 * 0.5 * 2\n        assert (priors_half_offset[1][0] - priors[1][0]).sum() == 10 * 0.5 * 2\n\n\ndef test_sparse_prior():\n    from mmdet.core.anchor import MlvlPointGenerator\n    mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)\n    prior_indexs = torch.Tensor([0, 2, 4, 5, 6, 9]).long()\n\n    featmap_sizes = [(3, 5), (6, 4)]\n    grid_anchors = mlvl_points.grid_priors(\n        featmap_sizes=featmap_sizes, with_stride=False, device='cpu')\n    sparse_prior = mlvl_points.sparse_priors(\n        prior_idxs=prior_indexs,\n        featmap_size=featmap_sizes[0],\n        level_idx=0,\n        device='cpu')\n\n    assert not sparse_prior.is_cuda\n    assert (sparse_prior == grid_anchors[0][prior_indexs]).all()\n    sparse_prior = mlvl_points.sparse_priors(\n        prior_idxs=prior_indexs,\n        featmap_size=featmap_sizes[1],\n        level_idx=1,\n        device='cpu')\n    assert (sparse_prior == grid_anchors[1][prior_indexs]).all()\n\n    from mmdet.core.anchor import AnchorGenerator\n    mlvl_anchors = AnchorGenerator(\n        strides=[16, 32], ratios=[1.], scales=[1.], base_sizes=[4, 8])\n    prior_indexs = torch.Tensor([0, 2, 4, 5, 6, 9]).long()\n\n    featmap_sizes = [(3, 5), (6, 4)]\n    grid_anchors = mlvl_anchors.grid_priors(\n        featmap_sizes=featmap_sizes, device='cpu')\n    sparse_prior = mlvl_anchors.sparse_priors(\n        prior_idxs=prior_indexs,\n        featmap_size=featmap_sizes[0],\n        level_idx=0,\n        device='cpu')\n    assert (sparse_prior == grid_anchors[0][prior_indexs]).all()\n    sparse_prior = mlvl_anchors.sparse_priors(\n        prior_idxs=prior_indexs,\n        featmap_size=featmap_sizes[1],\n        level_idx=1,\n        device='cpu')\n    assert (sparse_prior == grid_anchors[1][prior_indexs]).all()\n\n    # for ssd\n    from mmdet.core.anchor.anchor_generator import SSDAnchorGenerator\n    featmap_sizes = [(38, 38), (19, 19), (10, 10)]\n    anchor_generator = SSDAnchorGenerator(\n        scale_major=False,\n        input_size=300,\n        basesize_ratio_range=(0.15, 0.9),\n        strides=[8, 16, 32],\n        ratios=[[2], [2, 3], [2, 3]])\n    ssd_anchors = anchor_generator.grid_anchors(featmap_sizes, device='cpu')\n    for i in range(len(featmap_sizes)):\n        sparse_ssd_anchors = anchor_generator.sparse_priors(\n            prior_idxs=prior_indexs,\n            level_idx=i,\n            featmap_size=featmap_sizes[i],\n            device='cpu')\n        assert (sparse_ssd_anchors == ssd_anchors[i][prior_indexs]).all()\n\n    # for yolo\n    from mmdet.core.anchor.anchor_generator import YOLOAnchorGenerator\n    featmap_sizes = [(38, 38), (19, 19), (10, 10)]\n    anchor_generator = YOLOAnchorGenerator(\n        strides=[32, 16, 8],\n        base_sizes=[\n            [(116, 90), (156, 198), (373, 326)],\n            [(30, 61), (62, 45), (59, 119)],\n            [(10, 13), (16, 30), (33, 23)],\n        ])\n    yolo_anchors = anchor_generator.grid_anchors(featmap_sizes, device='cpu')\n    for i in range(len(featmap_sizes)):\n        sparse_yolo_anchors = anchor_generator.sparse_priors(\n            prior_idxs=prior_indexs,\n            level_idx=i,\n            featmap_size=featmap_sizes[i],\n            device='cpu')\n        assert (sparse_yolo_anchors == yolo_anchors[i][prior_indexs]).all()\n\n    if torch.cuda.is_available():\n        mlvl_points = MlvlPointGenerator(strides=[4, 10], offset=0)\n        prior_indexs = torch.Tensor([0, 3, 4, 5, 6, 7, 1, 2, 4, 5, 6,\n                                     9]).long().cuda()\n\n        featmap_sizes = [(6, 8), (6, 4)]\n        grid_anchors = mlvl_points.grid_priors(\n            featmap_sizes=featmap_sizes, with_stride=False, device='cuda')\n        sparse_prior = mlvl_points.sparse_priors(\n            prior_idxs=prior_indexs,\n            featmap_size=featmap_sizes[0],\n            level_idx=0,\n            device='cuda')\n        assert (sparse_prior == grid_anchors[0][prior_indexs]).all()\n        sparse_prior = mlvl_points.sparse_priors(\n            prior_idxs=prior_indexs,\n            featmap_size=featmap_sizes[1],\n            level_idx=1,\n            device='cuda')\n        assert (sparse_prior == grid_anchors[1][prior_indexs]).all()\n        assert sparse_prior.is_cuda\n        mlvl_anchors = AnchorGenerator(\n            strides=[16, 32],\n            ratios=[1., 2.5],\n            scales=[1., 5.],\n            base_sizes=[4, 8])\n        prior_indexs = torch.Tensor([4, 5, 6, 7, 0, 2, 50, 4, 5, 6,\n                                     9]).long().cuda()\n\n        featmap_sizes = [(13, 5), (16, 4)]\n        grid_anchors = mlvl_anchors.grid_priors(\n            featmap_sizes=featmap_sizes, device='cuda')\n        sparse_prior = mlvl_anchors.sparse_priors(\n            prior_idxs=prior_indexs,\n            featmap_size=featmap_sizes[0],\n            level_idx=0,\n            device='cuda')\n        assert (sparse_prior == grid_anchors[0][prior_indexs]).all()\n        sparse_prior = mlvl_anchors.sparse_priors(\n            prior_idxs=prior_indexs,\n            featmap_size=featmap_sizes[1],\n            level_idx=1,\n            device='cuda')\n        assert (sparse_prior == grid_anchors[1][prior_indexs]).all()\n\n        # for ssd\n        from mmdet.core.anchor.anchor_generator import SSDAnchorGenerator\n        featmap_sizes = [(38, 38), (19, 19), (10, 10)]\n        anchor_generator = SSDAnchorGenerator(\n            scale_major=False,\n            input_size=300,\n            basesize_ratio_range=(0.15, 0.9),\n            strides=[8, 16, 32],\n            ratios=[[2], [2, 3], [2, 3]])\n        ssd_anchors = anchor_generator.grid_anchors(\n            featmap_sizes, device='cuda')\n        for i in range(len(featmap_sizes)):\n            sparse_ssd_anchors = anchor_generator.sparse_priors(\n                prior_idxs=prior_indexs,\n                level_idx=i,\n                featmap_size=featmap_sizes[i],\n                device='cuda')\n            assert (sparse_ssd_anchors == ssd_anchors[i][prior_indexs]).all()\n\n        # for yolo\n        from mmdet.core.anchor.anchor_generator import YOLOAnchorGenerator\n        featmap_sizes = [(38, 38), (19, 19), (10, 10)]\n        anchor_generator = YOLOAnchorGenerator(\n            strides=[32, 16, 8],\n            base_sizes=[\n                [(116, 90), (156, 198), (373, 326)],\n                [(30, 61), (62, 45), (59, 119)],\n                [(10, 13), (16, 30), (33, 23)],\n            ])\n        yolo_anchors = anchor_generator.grid_anchors(\n            featmap_sizes, device='cuda')\n        for i in range(len(featmap_sizes)):\n            sparse_yolo_anchors = anchor_generator.sparse_priors(\n                prior_idxs=prior_indexs,\n                level_idx=i,\n                featmap_size=featmap_sizes[i],\n                device='cuda')\n            assert (sparse_yolo_anchors == yolo_anchors[i][prior_indexs]).all()\n\n\ndef test_standard_anchor_generator():\n    from mmdet.core.anchor import build_anchor_generator\n    anchor_generator_cfg = dict(\n        type='AnchorGenerator',\n        scales=[8],\n        ratios=[0.5, 1.0, 2.0],\n        strides=[4, 8])\n\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n    assert anchor_generator.num_base_priors == \\\n           anchor_generator.num_base_anchors\n    assert anchor_generator.num_base_priors == [3, 3]\n    assert anchor_generator is not None\n\n\ndef test_strides():\n    from mmdet.core import AnchorGenerator\n\n    # Square strides\n    self = AnchorGenerator([10], [1.], [1.], [10])\n    anchors = self.grid_anchors([(2, 2)], device='cpu')\n\n    expected_anchors = torch.tensor([[-5., -5., 5., 5.], [5., -5., 15., 5.],\n                                     [-5., 5., 5., 15.], [5., 5., 15., 15.]])\n\n    assert torch.equal(anchors[0], expected_anchors)\n\n    # Different strides in x and y direction\n    self = AnchorGenerator([(10, 20)], [1.], [1.], [10])\n    anchors = self.grid_anchors([(2, 2)], device='cpu')\n\n    expected_anchors = torch.tensor([[-5., -5., 5., 5.], [5., -5., 15., 5.],\n                                     [-5., 15., 5., 25.], [5., 15., 15., 25.]])\n\n    assert torch.equal(anchors[0], expected_anchors)\n\n\ndef test_ssd_anchor_generator():\n    from mmdet.core.anchor import build_anchor_generator\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n\n    # min_sizes max_sizes must set at the same time\n    with pytest.raises(AssertionError):\n        anchor_generator_cfg = dict(\n            type='SSDAnchorGenerator',\n            scale_major=False,\n            min_sizes=[48, 100, 150, 202, 253, 300],\n            max_sizes=None,\n            strides=[8, 16, 32, 64, 100, 300],\n            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]])\n        build_anchor_generator(anchor_generator_cfg)\n\n    # length of min_sizes max_sizes must be the same\n    with pytest.raises(AssertionError):\n        anchor_generator_cfg = dict(\n            type='SSDAnchorGenerator',\n            scale_major=False,\n            min_sizes=[48, 100, 150, 202, 253, 300],\n            max_sizes=[100, 150, 202, 253],\n            strides=[8, 16, 32, 64, 100, 300],\n            ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]])\n        build_anchor_generator(anchor_generator_cfg)\n\n    # test setting anchor size manually\n    anchor_generator_cfg = dict(\n        type='SSDAnchorGenerator',\n        scale_major=False,\n        min_sizes=[48, 100, 150, 202, 253, 304],\n        max_sizes=[100, 150, 202, 253, 304, 320],\n        strides=[16, 32, 64, 107, 160, 320],\n        ratios=[[2, 3], [2, 3], [2, 3], [2, 3], [2, 3], [2, 3]])\n\n    featmap_sizes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n\n    expected_base_anchors = [\n        torch.Tensor([[-16.0000, -16.0000, 32.0000, 32.0000],\n                      [-26.6410, -26.6410, 42.6410, 42.6410],\n                      [-25.9411, -8.9706, 41.9411, 24.9706],\n                      [-8.9706, -25.9411, 24.9706, 41.9411],\n                      [-33.5692, -5.8564, 49.5692, 21.8564],\n                      [-5.8564, -33.5692, 21.8564, 49.5692]]),\n        torch.Tensor([[-34.0000, -34.0000, 66.0000, 66.0000],\n                      [-45.2372, -45.2372, 77.2372, 77.2372],\n                      [-54.7107, -19.3553, 86.7107, 51.3553],\n                      [-19.3553, -54.7107, 51.3553, 86.7107],\n                      [-70.6025, -12.8675, 102.6025, 44.8675],\n                      [-12.8675, -70.6025, 44.8675, 102.6025]]),\n        torch.Tensor([[-43.0000, -43.0000, 107.0000, 107.0000],\n                      [-55.0345, -55.0345, 119.0345, 119.0345],\n                      [-74.0660, -21.0330, 138.0660, 85.0330],\n                      [-21.0330, -74.0660, 85.0330, 138.0660],\n                      [-97.9038, -11.3013, 161.9038, 75.3013],\n                      [-11.3013, -97.9038, 75.3013, 161.9038]]),\n        torch.Tensor([[-47.5000, -47.5000, 154.5000, 154.5000],\n                      [-59.5332, -59.5332, 166.5332, 166.5332],\n                      [-89.3356, -17.9178, 196.3356, 124.9178],\n                      [-17.9178, -89.3356, 124.9178, 196.3356],\n                      [-121.4371, -4.8124, 228.4371, 111.8124],\n                      [-4.8124, -121.4371, 111.8124, 228.4371]]),\n        torch.Tensor([[-46.5000, -46.5000, 206.5000, 206.5000],\n                      [-58.6651, -58.6651, 218.6651, 218.6651],\n                      [-98.8980, -9.4490, 258.8980, 169.4490],\n                      [-9.4490, -98.8980, 169.4490, 258.8980],\n                      [-139.1044, 6.9652, 299.1044, 153.0348],\n                      [6.9652, -139.1044, 153.0348, 299.1044]]),\n        torch.Tensor([[8.0000, 8.0000, 312.0000, 312.0000],\n                      [4.0513, 4.0513, 315.9487, 315.9487],\n                      [-54.9605, 52.5198, 374.9604, 267.4802],\n                      [52.5198, -54.9605, 267.4802, 374.9604],\n                      [-103.2717, 72.2428, 423.2717, 247.7572],\n                      [72.2428, -103.2717, 247.7572, 423.2717]])\n    ]\n\n    base_anchors = anchor_generator.base_anchors\n    for i, base_anchor in enumerate(base_anchors):\n        assert base_anchor.allclose(expected_base_anchors[i])\n\n    # check valid flags\n    expected_valid_pixels = [2400, 600, 150, 54, 24, 6]\n    multi_level_valid_flags = anchor_generator.valid_flags(\n        featmap_sizes, (320, 320), device)\n    for i, single_level_valid_flag in enumerate(multi_level_valid_flags):\n        assert single_level_valid_flag.sum() == expected_valid_pixels[i]\n\n    # check number of base anchors for each level\n    assert anchor_generator.num_base_anchors == [6, 6, 6, 6, 6, 6]\n\n    # check anchor generation\n    anchors = anchor_generator.grid_anchors(featmap_sizes, device)\n    assert len(anchors) == 6\n\n    # test vgg ssd anchor setting\n    anchor_generator_cfg = dict(\n        type='SSDAnchorGenerator',\n        scale_major=False,\n        input_size=300,\n        basesize_ratio_range=(0.15, 0.9),\n        strides=[8, 16, 32, 64, 100, 300],\n        ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]])\n\n    featmap_sizes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n\n    # check base anchors\n    expected_base_anchors = [\n        torch.Tensor([[-6.5000, -6.5000, 14.5000, 14.5000],\n                      [-11.3704, -11.3704, 19.3704, 19.3704],\n                      [-10.8492, -3.4246, 18.8492, 11.4246],\n                      [-3.4246, -10.8492, 11.4246, 18.8492]]),\n        torch.Tensor([[-14.5000, -14.5000, 30.5000, 30.5000],\n                      [-25.3729, -25.3729, 41.3729, 41.3729],\n                      [-23.8198, -7.9099, 39.8198, 23.9099],\n                      [-7.9099, -23.8198, 23.9099, 39.8198],\n                      [-30.9711, -4.9904, 46.9711, 20.9904],\n                      [-4.9904, -30.9711, 20.9904, 46.9711]]),\n        torch.Tensor([[-33.5000, -33.5000, 65.5000, 65.5000],\n                      [-45.5366, -45.5366, 77.5366, 77.5366],\n                      [-54.0036, -19.0018, 86.0036, 51.0018],\n                      [-19.0018, -54.0036, 51.0018, 86.0036],\n                      [-69.7365, -12.5788, 101.7365, 44.5788],\n                      [-12.5788, -69.7365, 44.5788, 101.7365]]),\n        torch.Tensor([[-44.5000, -44.5000, 108.5000, 108.5000],\n                      [-56.9817, -56.9817, 120.9817, 120.9817],\n                      [-76.1873, -22.0937, 140.1873, 86.0937],\n                      [-22.0937, -76.1873, 86.0937, 140.1873],\n                      [-100.5019, -12.1673, 164.5019, 76.1673],\n                      [-12.1673, -100.5019, 76.1673, 164.5019]]),\n        torch.Tensor([[-53.5000, -53.5000, 153.5000, 153.5000],\n                      [-66.2185, -66.2185, 166.2185, 166.2185],\n                      [-96.3711, -23.1855, 196.3711, 123.1855],\n                      [-23.1855, -96.3711, 123.1855, 196.3711]]),\n        torch.Tensor([[19.5000, 19.5000, 280.5000, 280.5000],\n                      [6.6342, 6.6342, 293.3658, 293.3658],\n                      [-34.5549, 57.7226, 334.5549, 242.2774],\n                      [57.7226, -34.5549, 242.2774, 334.5549]]),\n    ]\n    base_anchors = anchor_generator.base_anchors\n    for i, base_anchor in enumerate(base_anchors):\n        assert base_anchor.allclose(expected_base_anchors[i])\n\n    # check valid flags\n    expected_valid_pixels = [5776, 2166, 600, 150, 36, 4]\n    multi_level_valid_flags = anchor_generator.valid_flags(\n        featmap_sizes, (300, 300), device)\n    for i, single_level_valid_flag in enumerate(multi_level_valid_flags):\n        assert single_level_valid_flag.sum() == expected_valid_pixels[i]\n\n    # check number of base anchors for each level\n    assert anchor_generator.num_base_anchors == [4, 6, 6, 6, 4, 4]\n\n    # check anchor generation\n    anchors = anchor_generator.grid_anchors(featmap_sizes, device)\n    assert len(anchors) == 6\n\n\ndef test_anchor_generator_with_tuples():\n    from mmdet.core.anchor import build_anchor_generator\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n\n    anchor_generator_cfg = dict(\n        type='SSDAnchorGenerator',\n        scale_major=False,\n        input_size=300,\n        basesize_ratio_range=(0.15, 0.9),\n        strides=[8, 16, 32, 64, 100, 300],\n        ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]])\n\n    featmap_sizes = [(38, 38), (19, 19), (10, 10), (5, 5), (3, 3), (1, 1)]\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n    anchors = anchor_generator.grid_anchors(featmap_sizes, device)\n\n    anchor_generator_cfg_tuples = dict(\n        type='SSDAnchorGenerator',\n        scale_major=False,\n        input_size=300,\n        basesize_ratio_range=(0.15, 0.9),\n        strides=[(8, 8), (16, 16), (32, 32), (64, 64), (100, 100), (300, 300)],\n        ratios=[[2], [2, 3], [2, 3], [2, 3], [2], [2]])\n\n    anchor_generator_tuples = build_anchor_generator(\n        anchor_generator_cfg_tuples)\n    anchors_tuples = anchor_generator_tuples.grid_anchors(\n        featmap_sizes, device)\n    for anchor, anchor_tuples in zip(anchors, anchors_tuples):\n        assert torch.equal(anchor, anchor_tuples)\n\n\ndef test_yolo_anchor_generator():\n    from mmdet.core.anchor import build_anchor_generator\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n\n    anchor_generator_cfg = dict(\n        type='YOLOAnchorGenerator',\n        strides=[32, 16, 8],\n        base_sizes=[\n            [(116, 90), (156, 198), (373, 326)],\n            [(30, 61), (62, 45), (59, 119)],\n            [(10, 13), (16, 30), (33, 23)],\n        ])\n\n    featmap_sizes = [(14, 18), (28, 36), (56, 72)]\n    anchor_generator = build_anchor_generator(anchor_generator_cfg)\n\n    # check base anchors\n    expected_base_anchors = [\n        torch.Tensor([[-42.0000, -29.0000, 74.0000, 61.0000],\n                      [-62.0000, -83.0000, 94.0000, 115.0000],\n                      [-170.5000, -147.0000, 202.5000, 179.0000]]),\n        torch.Tensor([[-7.0000, -22.5000, 23.0000, 38.5000],\n                      [-23.0000, -14.5000, 39.0000, 30.5000],\n                      [-21.5000, -51.5000, 37.5000, 67.5000]]),\n        torch.Tensor([[-1.0000, -2.5000, 9.0000, 10.5000],\n                      [-4.0000, -11.0000, 12.0000, 19.0000],\n                      [-12.5000, -7.5000, 20.5000, 15.5000]])\n    ]\n    base_anchors = anchor_generator.base_anchors\n    for i, base_anchor in enumerate(base_anchors):\n        assert base_anchor.allclose(expected_base_anchors[i])\n\n    # check number of base anchors for each level\n    assert anchor_generator.num_base_anchors == [3, 3, 3]\n\n    # check anchor generation\n    anchors = anchor_generator.grid_anchors(featmap_sizes, device)\n    assert len(anchors) == 3\n\n\ndef test_retina_anchor():\n    from mmdet.models import build_head\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n\n    # head configs modified from\n    # configs/nas_fpn/retinanet_r50_fpn_crop640_50e.py\n    bbox_head = dict(\n        type='RetinaSepBNHead',\n        num_classes=4,\n        num_ins=5,\n        in_channels=4,\n        stacked_convs=1,\n        feat_channels=4,\n        anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        bbox_coder=dict(\n            type='DeltaXYWHBBoxCoder',\n            target_means=[.0, .0, .0, .0],\n            target_stds=[1.0, 1.0, 1.0, 1.0]))\n\n    retina_head = build_head(bbox_head)\n    assert retina_head.anchor_generator is not None\n\n    # use the featmap sizes in NASFPN setting to test retina head\n    featmap_sizes = [(80, 80), (40, 40), (20, 20), (10, 10), (5, 5)]\n    # check base anchors\n    expected_base_anchors = [\n        torch.Tensor([[-22.6274, -11.3137, 22.6274, 11.3137],\n                      [-28.5088, -14.2544, 28.5088, 14.2544],\n                      [-35.9188, -17.9594, 35.9188, 17.9594],\n                      [-16.0000, -16.0000, 16.0000, 16.0000],\n                      [-20.1587, -20.1587, 20.1587, 20.1587],\n                      [-25.3984, -25.3984, 25.3984, 25.3984],\n                      [-11.3137, -22.6274, 11.3137, 22.6274],\n                      [-14.2544, -28.5088, 14.2544, 28.5088],\n                      [-17.9594, -35.9188, 17.9594, 35.9188]]),\n        torch.Tensor([[-45.2548, -22.6274, 45.2548, 22.6274],\n                      [-57.0175, -28.5088, 57.0175, 28.5088],\n                      [-71.8376, -35.9188, 71.8376, 35.9188],\n                      [-32.0000, -32.0000, 32.0000, 32.0000],\n                      [-40.3175, -40.3175, 40.3175, 40.3175],\n                      [-50.7968, -50.7968, 50.7968, 50.7968],\n                      [-22.6274, -45.2548, 22.6274, 45.2548],\n                      [-28.5088, -57.0175, 28.5088, 57.0175],\n                      [-35.9188, -71.8376, 35.9188, 71.8376]]),\n        torch.Tensor([[-90.5097, -45.2548, 90.5097, 45.2548],\n                      [-114.0350, -57.0175, 114.0350, 57.0175],\n                      [-143.6751, -71.8376, 143.6751, 71.8376],\n                      [-64.0000, -64.0000, 64.0000, 64.0000],\n                      [-80.6349, -80.6349, 80.6349, 80.6349],\n                      [-101.5937, -101.5937, 101.5937, 101.5937],\n                      [-45.2548, -90.5097, 45.2548, 90.5097],\n                      [-57.0175, -114.0350, 57.0175, 114.0350],\n                      [-71.8376, -143.6751, 71.8376, 143.6751]]),\n        torch.Tensor([[-181.0193, -90.5097, 181.0193, 90.5097],\n                      [-228.0701, -114.0350, 228.0701, 114.0350],\n                      [-287.3503, -143.6751, 287.3503, 143.6751],\n                      [-128.0000, -128.0000, 128.0000, 128.0000],\n                      [-161.2699, -161.2699, 161.2699, 161.2699],\n                      [-203.1873, -203.1873, 203.1873, 203.1873],\n                      [-90.5097, -181.0193, 90.5097, 181.0193],\n                      [-114.0350, -228.0701, 114.0350, 228.0701],\n                      [-143.6751, -287.3503, 143.6751, 287.3503]]),\n        torch.Tensor([[-362.0387, -181.0193, 362.0387, 181.0193],\n                      [-456.1401, -228.0701, 456.1401, 228.0701],\n                      [-574.7006, -287.3503, 574.7006, 287.3503],\n                      [-256.0000, -256.0000, 256.0000, 256.0000],\n                      [-322.5398, -322.5398, 322.5398, 322.5398],\n                      [-406.3747, -406.3747, 406.3747, 406.3747],\n                      [-181.0193, -362.0387, 181.0193, 362.0387],\n                      [-228.0701, -456.1401, 228.0701, 456.1401],\n                      [-287.3503, -574.7006, 287.3503, 574.7006]])\n    ]\n    base_anchors = retina_head.anchor_generator.base_anchors\n    for i, base_anchor in enumerate(base_anchors):\n        assert base_anchor.allclose(expected_base_anchors[i])\n\n    # check valid flags\n    expected_valid_pixels = [57600, 14400, 3600, 900, 225]\n    multi_level_valid_flags = retina_head.anchor_generator.valid_flags(\n        featmap_sizes, (640, 640), device)\n    for i, single_level_valid_flag in enumerate(multi_level_valid_flags):\n        assert single_level_valid_flag.sum() == expected_valid_pixels[i]\n\n    # check number of base anchors for each level\n    assert retina_head.anchor_generator.num_base_anchors == [9, 9, 9, 9, 9]\n\n    # check anchor generation\n    anchors = retina_head.anchor_generator.grid_anchors(featmap_sizes, device)\n    assert len(anchors) == 5\n\n\ndef test_guided_anchor():\n    from mmdet.models import build_head\n    if torch.cuda.is_available():\n        device = 'cuda'\n    else:\n        device = 'cpu'\n    # head configs modified from\n    # configs/guided_anchoring/ga_retinanet_r50_fpn_1x_coco.py\n    bbox_head = dict(\n        type='GARetinaHead',\n        num_classes=8,\n        in_channels=4,\n        stacked_convs=1,\n        feat_channels=4,\n        approx_anchor_generator=dict(\n            type='AnchorGenerator',\n            octave_base_scale=4,\n            scales_per_octave=3,\n            ratios=[0.5, 1.0, 2.0],\n            strides=[8, 16, 32, 64, 128]),\n        square_anchor_generator=dict(\n            type='AnchorGenerator',\n            ratios=[1.0],\n            scales=[4],\n            strides=[8, 16, 32, 64, 128]))\n\n    ga_retina_head = build_head(bbox_head)\n    assert ga_retina_head.approx_anchor_generator is not None\n\n    # use the featmap sizes in NASFPN setting to test ga_retina_head\n    featmap_sizes = [(100, 152), (50, 76), (25, 38), (13, 19), (7, 10)]\n    # check base anchors\n    expected_approxs = [\n        torch.Tensor([[-22.6274, -11.3137, 22.6274, 11.3137],\n                      [-28.5088, -14.2544, 28.5088, 14.2544],\n                      [-35.9188, -17.9594, 35.9188, 17.9594],\n                      [-16.0000, -16.0000, 16.0000, 16.0000],\n                      [-20.1587, -20.1587, 20.1587, 20.1587],\n                      [-25.3984, -25.3984, 25.3984, 25.3984],\n                      [-11.3137, -22.6274, 11.3137, 22.6274],\n                      [-14.2544, -28.5088, 14.2544, 28.5088],\n                      [-17.9594, -35.9188, 17.9594, 35.9188]]),\n        torch.Tensor([[-45.2548, -22.6274, 45.2548, 22.6274],\n                      [-57.0175, -28.5088, 57.0175, 28.5088],\n                      [-71.8376, -35.9188, 71.8376, 35.9188],\n                      [-32.0000, -32.0000, 32.0000, 32.0000],\n                      [-40.3175, -40.3175, 40.3175, 40.3175],\n                      [-50.7968, -50.7968, 50.7968, 50.7968],\n                      [-22.6274, -45.2548, 22.6274, 45.2548],\n                      [-28.5088, -57.0175, 28.5088, 57.0175],\n                      [-35.9188, -71.8376, 35.9188, 71.8376]]),\n        torch.Tensor([[-90.5097, -45.2548, 90.5097, 45.2548],\n                      [-114.0350, -57.0175, 114.0350, 57.0175],\n                      [-143.6751, -71.8376, 143.6751, 71.8376],\n                      [-64.0000, -64.0000, 64.0000, 64.0000],\n                      [-80.6349, -80.6349, 80.6349, 80.6349],\n                      [-101.5937, -101.5937, 101.5937, 101.5937],\n                      [-45.2548, -90.5097, 45.2548, 90.5097],\n                      [-57.0175, -114.0350, 57.0175, 114.0350],\n                      [-71.8376, -143.6751, 71.8376, 143.6751]]),\n        torch.Tensor([[-181.0193, -90.5097, 181.0193, 90.5097],\n                      [-228.0701, -114.0350, 228.0701, 114.0350],\n                      [-287.3503, -143.6751, 287.3503, 143.6751],\n                      [-128.0000, -128.0000, 128.0000, 128.0000],\n                      [-161.2699, -161.2699, 161.2699, 161.2699],\n                      [-203.1873, -203.1873, 203.1873, 203.1873],\n                      [-90.5097, -181.0193, 90.5097, 181.0193],\n                      [-114.0350, -228.0701, 114.0350, 228.0701],\n                      [-143.6751, -287.3503, 143.6751, 287.3503]]),\n        torch.Tensor([[-362.0387, -181.0193, 362.0387, 181.0193],\n                      [-456.1401, -228.0701, 456.1401, 228.0701],\n                      [-574.7006, -287.3503, 574.7006, 287.3503],\n                      [-256.0000, -256.0000, 256.0000, 256.0000],\n                      [-322.5398, -322.5398, 322.5398, 322.5398],\n                      [-406.3747, -406.3747, 406.3747, 406.3747],\n                      [-181.0193, -362.0387, 181.0193, 362.0387],\n                      [-228.0701, -456.1401, 228.0701, 456.1401],\n                      [-287.3503, -574.7006, 287.3503, 574.7006]])\n    ]\n    approxs = ga_retina_head.approx_anchor_generator.base_anchors\n    for i, base_anchor in enumerate(approxs):\n        assert base_anchor.allclose(expected_approxs[i])\n\n    # check valid flags\n    expected_valid_pixels = [136800, 34200, 8550, 2223, 630]\n    multi_level_valid_flags = ga_retina_head.approx_anchor_generator \\\n        .valid_flags(featmap_sizes, (800, 1216), device)\n    for i, single_level_valid_flag in enumerate(multi_level_valid_flags):\n        assert single_level_valid_flag.sum() == expected_valid_pixels[i]\n\n    # check number of base anchors for each level\n    assert ga_retina_head.approx_anchor_generator.num_base_anchors == [\n        9, 9, 9, 9, 9\n    ]\n\n    # check approx generation\n    squares = ga_retina_head.square_anchor_generator.grid_anchors(\n        featmap_sizes, device)\n    assert len(squares) == 5\n\n    expected_squares = [\n        torch.Tensor([[-16., -16., 16., 16.]]),\n        torch.Tensor([[-32., -32., 32., 32]]),\n        torch.Tensor([[-64., -64., 64., 64.]]),\n        torch.Tensor([[-128., -128., 128., 128.]]),\n        torch.Tensor([[-256., -256., 256., 256.]])\n    ]\n    squares = ga_retina_head.square_anchor_generator.base_anchors\n    for i, base_anchor in enumerate(squares):\n        assert base_anchor.allclose(expected_squares[i])\n\n    # square_anchor_generator does not check valid flags\n    # check number of base anchors for each level\n    assert (ga_retina_head.square_anchor_generator.num_base_anchors == [\n        1, 1, 1, 1, 1\n    ])\n\n    # check square generation\n    anchors = ga_retina_head.square_anchor_generator.grid_anchors(\n        featmap_sizes, device)\n    assert len(anchors) == 5\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_assigner.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"Tests the Assigner objects.\n\nCommandLine:\n    pytest tests/test_utils/test_assigner.py\n    xdoctest tests/test_utils/test_assigner.py zero\n\"\"\"\nimport pytest\nimport torch\n\nfrom mmdet.core.bbox.assigners import (ApproxMaxIoUAssigner,\n                                       CenterRegionAssigner, HungarianAssigner,\n                                       MaskHungarianAssigner, MaxIoUAssigner,\n                                       PointAssigner, SimOTAAssigner,\n                                       TaskAlignedAssigner, UniformAssigner)\n\n\ndef test_max_iou_assigner():\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 4\n    assert len(assign_result.labels) == 4\n\n    expected_gt_inds = torch.LongTensor([1, 0, 2, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_max_iou_assigner_with_ignore():\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n        ignore_wrt_candidates=False,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [30, 32, 40, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_bboxes_ignore = torch.Tensor([\n        [30, 30, 40, 40],\n    ])\n    assign_result = self.assign(\n        bboxes, gt_bboxes, gt_bboxes_ignore=gt_bboxes_ignore)\n\n    expected_gt_inds = torch.LongTensor([1, 0, 2, -1])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_max_iou_assigner_with_empty_gt():\n    \"\"\"Test corner case where an image might have no true detections.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.empty(0, 4)\n    assign_result = self.assign(bboxes, gt_bboxes)\n\n    expected_gt_inds = torch.LongTensor([0, 0, 0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_max_iou_assigner_with_empty_boxes():\n    \"\"\"Test corner case where a network might predict no boxes.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n\n    # Test with gt_labels\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 0\n    assert tuple(assign_result.labels.shape) == (0, )\n\n    # Test without gt_labels\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=None)\n    assert len(assign_result.gt_inds) == 0\n    assert assign_result.labels is None\n\n\ndef test_max_iou_assigner_with_empty_boxes_and_ignore():\n    \"\"\"Test corner case where a network might predict no boxes and\n    ignore_iof_thr is on.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n        ignore_iof_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_bboxes_ignore = torch.Tensor([\n        [30, 30, 40, 40],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n\n    # Test with gt_labels\n    assign_result = self.assign(\n        bboxes,\n        gt_bboxes,\n        gt_labels=gt_labels,\n        gt_bboxes_ignore=gt_bboxes_ignore)\n    assert len(assign_result.gt_inds) == 0\n    assert tuple(assign_result.labels.shape) == (0, )\n\n    # Test without gt_labels\n    assign_result = self.assign(\n        bboxes, gt_bboxes, gt_labels=None, gt_bboxes_ignore=gt_bboxes_ignore)\n    assert len(assign_result.gt_inds) == 0\n    assert assign_result.labels is None\n\n\ndef test_max_iou_assigner_with_empty_boxes_and_gt():\n    \"\"\"Test corner case where a network might predict no boxes and no gt.\"\"\"\n    self = MaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.empty((0, 4))\n    assign_result = self.assign(bboxes, gt_bboxes)\n    assert len(assign_result.gt_inds) == 0\n\n\ndef test_point_assigner():\n    self = PointAssigner()\n    points = torch.FloatTensor([  # [x, y, stride]\n        [0, 0, 1],\n        [10, 10, 1],\n        [5, 5, 1],\n        [32, 32, 1],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    assign_result = self.assign(points, gt_bboxes)\n    expected_gt_inds = torch.LongTensor([1, 2, 1, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_point_assigner_with_empty_gt():\n    \"\"\"Test corner case where an image might have no true detections.\"\"\"\n    self = PointAssigner()\n    points = torch.FloatTensor([  # [x, y, stride]\n        [0, 0, 1],\n        [10, 10, 1],\n        [5, 5, 1],\n        [32, 32, 1],\n    ])\n    gt_bboxes = torch.FloatTensor([])\n    assign_result = self.assign(points, gt_bboxes)\n\n    expected_gt_inds = torch.LongTensor([0, 0, 0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_point_assigner_with_empty_boxes_and_gt():\n    \"\"\"Test corner case where an image might predict no points and no gt.\"\"\"\n    self = PointAssigner()\n    points = torch.FloatTensor([])\n    gt_bboxes = torch.FloatTensor([])\n    assign_result = self.assign(points, gt_bboxes)\n    assert len(assign_result.gt_inds) == 0\n\n\ndef test_approx_iou_assigner():\n    self = ApproxMaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    approxs_per_octave = 1\n    approxs = bboxes\n    squares = bboxes\n    assign_result = self.assign(approxs, squares, approxs_per_octave,\n                                gt_bboxes)\n\n    expected_gt_inds = torch.LongTensor([1, 0, 2, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_approx_iou_assigner_with_empty_gt():\n    \"\"\"Test corner case where an image might have no true detections.\"\"\"\n    self = ApproxMaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([])\n    approxs_per_octave = 1\n    approxs = bboxes\n    squares = bboxes\n    assign_result = self.assign(approxs, squares, approxs_per_octave,\n                                gt_bboxes)\n\n    expected_gt_inds = torch.LongTensor([0, 0, 0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_approx_iou_assigner_with_empty_boxes():\n    \"\"\"Test corner case where an network might predict no boxes.\"\"\"\n    self = ApproxMaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    approxs_per_octave = 1\n    approxs = bboxes\n    squares = bboxes\n    assign_result = self.assign(approxs, squares, approxs_per_octave,\n                                gt_bboxes)\n    assert len(assign_result.gt_inds) == 0\n\n\ndef test_approx_iou_assigner_with_empty_boxes_and_gt():\n    \"\"\"Test corner case where an network might predict no boxes and no gt.\"\"\"\n    self = ApproxMaxIoUAssigner(\n        pos_iou_thr=0.5,\n        neg_iou_thr=0.5,\n    )\n    bboxes = torch.empty((0, 4))\n    gt_bboxes = torch.empty((0, 4))\n    approxs_per_octave = 1\n    approxs = bboxes\n    squares = bboxes\n    assign_result = self.assign(approxs, squares, approxs_per_octave,\n                                gt_bboxes)\n    assert len(assign_result.gt_inds) == 0\n\n\ndef test_random_assign_result():\n    \"\"\"Test random instantiation of assign result to catch corner cases.\"\"\"\n    from mmdet.core.bbox.assigners.assign_result import AssignResult\n    AssignResult.random()\n\n    AssignResult.random(num_gts=0, num_preds=0)\n    AssignResult.random(num_gts=0, num_preds=3)\n    AssignResult.random(num_gts=3, num_preds=3)\n    AssignResult.random(num_gts=0, num_preds=3)\n    AssignResult.random(num_gts=7, num_preds=7)\n    AssignResult.random(num_gts=7, num_preds=64)\n    AssignResult.random(num_gts=24, num_preds=3)\n\n\ndef test_center_region_assigner():\n    self = CenterRegionAssigner(pos_scale=0.3, neg_scale=1)\n    bboxes = torch.FloatTensor([[0, 0, 10, 10], [10, 10, 20, 20], [8, 8, 9,\n                                                                   9]])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 11, 11],  # match bboxes[0]\n        [10, 10, 20, 20],  # match bboxes[1]\n        [4.5, 4.5, 5.5, 5.5],  # match bboxes[0] but area is too small\n        [0, 0, 10, 10],  # match bboxes[1] and has a smaller area than gt[0]\n    ])\n    gt_labels = torch.LongTensor([2, 3, 4, 5])\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 3\n    assert len(assign_result.labels) == 3\n    expected_gt_inds = torch.LongTensor([4, 2, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n    shadowed_labels = assign_result.get_extra_property('shadowed_labels')\n    # [8, 8, 9, 9] in the shadowed region of [0, 0, 11, 11] (label: 2)\n    assert torch.any(shadowed_labels == torch.LongTensor([[2, 2]]))\n    # [8, 8, 9, 9] in the shadowed region of [0, 0, 10, 10] (label: 5)\n    assert torch.any(shadowed_labels == torch.LongTensor([[2, 5]]))\n    # [0, 0, 10, 10] is already assigned to [4.5, 4.5, 5.5, 5.5].\n    #   Therefore, [0, 0, 11, 11] (label: 2) is shadowed\n    assert torch.any(shadowed_labels == torch.LongTensor([[0, 2]]))\n\n\ndef test_center_region_assigner_with_ignore():\n    self = CenterRegionAssigner(\n        pos_scale=0.5,\n        neg_scale=1,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],  # match bboxes[0]\n        [10, 10, 20, 20],  # match bboxes[1]\n    ])\n    gt_bboxes_ignore = torch.FloatTensor([\n        [0, 0, 10, 10],  # match bboxes[0]\n    ])\n    gt_labels = torch.LongTensor([1, 2])\n    assign_result = self.assign(\n        bboxes,\n        gt_bboxes,\n        gt_bboxes_ignore=gt_bboxes_ignore,\n        gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 2\n    assert len(assign_result.labels) == 2\n\n    expected_gt_inds = torch.LongTensor([-1, 2])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_center_region_assigner_with_empty_bboxes():\n    self = CenterRegionAssigner(\n        pos_scale=0.5,\n        neg_scale=1,\n    )\n    bboxes = torch.empty((0, 4)).float()\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],  # match bboxes[0]\n        [10, 10, 20, 20],  # match bboxes[1]\n    ])\n    gt_labels = torch.LongTensor([1, 2])\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert assign_result.gt_inds is None or assign_result.gt_inds.numel() == 0\n    assert assign_result.labels is None or assign_result.labels.numel() == 0\n\n\ndef test_center_region_assigner_with_empty_gts():\n    self = CenterRegionAssigner(\n        pos_scale=0.5,\n        neg_scale=1,\n    )\n    bboxes = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n    ])\n    gt_bboxes = torch.empty((0, 4)).float()\n    gt_labels = torch.empty((0, )).long()\n    assign_result = self.assign(bboxes, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 2\n    expected_gt_inds = torch.LongTensor([0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_hungarian_match_assigner():\n    self = HungarianAssigner()\n    assert self.iou_cost.iou_mode == 'giou'\n\n    # test no gt bboxes\n    bbox_pred = torch.rand((10, 4))\n    cls_pred = torch.rand((10, 81))\n    gt_bboxes = torch.empty((0, 4)).float()\n    gt_labels = torch.empty((0, )).long()\n    img_meta = dict(img_shape=(10, 8, 3))\n    assign_result = self.assign(bbox_pred, cls_pred, gt_bboxes, gt_labels,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds == 0)\n    assert torch.all(assign_result.labels == -1)\n\n    # test with gt bboxes\n    gt_bboxes = torch.FloatTensor([[0, 0, 5, 7], [3, 5, 7, 8]])\n    gt_labels = torch.LongTensor([1, 20])\n    assign_result = self.assign(bbox_pred, cls_pred, gt_bboxes, gt_labels,\n                                img_meta)\n\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_bboxes.size(0)\n    assert (assign_result.labels > -1).sum() == gt_bboxes.size(0)\n\n    # test iou mode\n    self = HungarianAssigner(\n        iou_cost=dict(type='IoUCost', iou_mode='iou', weight=1.0))\n    assert self.iou_cost.iou_mode == 'iou'\n    assign_result = self.assign(bbox_pred, cls_pred, gt_bboxes, gt_labels,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_bboxes.size(0)\n    assert (assign_result.labels > -1).sum() == gt_bboxes.size(0)\n\n    # test focal loss mode\n    self = HungarianAssigner(\n        iou_cost=dict(type='IoUCost', iou_mode='giou', weight=1.0),\n        cls_cost=dict(type='FocalLossCost', weight=1.))\n    assert self.iou_cost.iou_mode == 'giou'\n    assign_result = self.assign(bbox_pred, cls_pred, gt_bboxes, gt_labels,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_bboxes.size(0)\n    assert (assign_result.labels > -1).sum() == gt_bboxes.size(0)\n\n\ndef test_uniform_assigner():\n    self = UniformAssigner(0.15, 0.7, 1)\n    pred_bbox = torch.FloatTensor([\n        [1, 1, 12, 8],\n        [4, 4, 20, 20],\n        [1, 5, 15, 15],\n        [30, 5, 32, 42],\n    ])\n    anchor = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n    assign_result = self.assign(\n        pred_bbox, anchor, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 4\n    assert len(assign_result.labels) == 4\n\n    expected_gt_inds = torch.LongTensor([-1, 0, 2, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_uniform_assigner_with_empty_gt():\n    \"\"\"Test corner case where an image might have no true detections.\"\"\"\n    self = UniformAssigner(0.15, 0.7, 1)\n    pred_bbox = torch.FloatTensor([\n        [1, 1, 12, 8],\n        [4, 4, 20, 20],\n        [1, 5, 15, 15],\n        [30, 5, 32, 42],\n    ])\n    anchor = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.empty(0, 4)\n    assign_result = self.assign(pred_bbox, anchor, gt_bboxes)\n\n    expected_gt_inds = torch.LongTensor([0, 0, 0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_uniform_assigner_with_empty_boxes():\n    \"\"\"Test corner case where a network might predict no boxes.\"\"\"\n    self = UniformAssigner(0.15, 0.7, 1)\n    pred_bbox = torch.empty((0, 4))\n    anchor = torch.empty((0, 4))\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([2, 3])\n\n    # Test with gt_labels\n    assign_result = self.assign(\n        pred_bbox, anchor, gt_bboxes, gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 0\n    assert tuple(assign_result.labels.shape) == (0, )\n\n    # Test without gt_labels\n    assign_result = self.assign(pred_bbox, anchor, gt_bboxes, gt_labels=None)\n    assert len(assign_result.gt_inds) == 0\n\n\ndef test_sim_ota_assigner():\n    self = SimOTAAssigner(\n        center_radius=2.5, candidate_topk=1, iou_weight=3.0, cls_weight=1.0)\n    pred_scores = torch.FloatTensor([[0.2], [0.8]])\n    priors = torch.Tensor([[0, 12, 23, 34], [4, 5, 6, 7]])\n    decoded_bboxes = torch.Tensor([[[30, 40, 50, 60]], [[4, 5, 6, 7]]])\n    gt_bboxes = torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]])\n    gt_labels = torch.LongTensor([2])\n    assign_result = self.assign(pred_scores, priors, decoded_bboxes, gt_bboxes,\n                                gt_labels)\n\n    expected_gt_inds = torch.LongTensor([0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_task_aligned_assigner():\n    with pytest.raises(AssertionError):\n        TaskAlignedAssigner(topk=0)\n\n    self = TaskAlignedAssigner(topk=13)\n    pred_score = torch.FloatTensor([[0.1, 0.2], [0.2, 0.3], [0.3, 0.4],\n                                    [0.4, 0.5]])\n    pred_bbox = torch.FloatTensor([\n        [1, 1, 12, 8],\n        [4, 4, 20, 20],\n        [1, 5, 15, 15],\n        [30, 5, 32, 42],\n    ])\n    anchor = torch.FloatTensor([\n        [0, 0, 10, 10],\n        [10, 10, 20, 20],\n        [5, 5, 15, 15],\n        [32, 32, 38, 42],\n    ])\n    gt_bboxes = torch.FloatTensor([\n        [0, 0, 10, 9],\n        [0, 10, 10, 19],\n    ])\n    gt_labels = torch.LongTensor([0, 1])\n    assign_result = self.assign(\n        pred_score,\n        pred_bbox,\n        anchor,\n        gt_bboxes=gt_bboxes,\n        gt_labels=gt_labels)\n    assert len(assign_result.gt_inds) == 4\n    assert len(assign_result.labels) == 4\n\n    # test empty gt\n    gt_bboxes = torch.empty(0, 4)\n    gt_labels = torch.empty(0, 2)\n    assign_result = self.assign(\n        pred_score, pred_bbox, anchor, gt_bboxes=gt_bboxes)\n    expected_gt_inds = torch.LongTensor([0, 0, 0, 0])\n    assert torch.all(assign_result.gt_inds == expected_gt_inds)\n\n\ndef test_mask_hungarian_match_assigner():\n    # test no gt masks\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=1.0),\n        mask_cost=dict(type='FocalLossCost', weight=20.0, binary_input=True),\n        dice_cost=dict(type='DiceCost', weight=1.0, pred_act=True, eps=1.0))\n    self = MaskHungarianAssigner(**assigner_cfg)\n    cls_pred = torch.rand((10, 133))\n    mask_pred = torch.rand((10, 50, 50))\n\n    gt_labels = torch.empty((0, )).long()\n    gt_masks = torch.empty((0, 50, 50)).float()\n    img_meta = None\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds == 0)\n    assert torch.all(assign_result.labels == -1)\n\n    # test with gt masks of naive_dice is True\n    gt_labels = torch.LongTensor([10, 100])\n    gt_masks = torch.zeros((2, 50, 50)).long()\n    gt_masks[0, :25] = 1\n    gt_masks[0, 25:] = 1\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_labels.size(0)\n    assert (assign_result.labels > -1).sum() == gt_labels.size(0)\n\n    # test with cls mode\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=1.0),\n        mask_cost=dict(type='FocalLossCost', weight=0.0, binary_input=True),\n        dice_cost=dict(type='DiceCost', weight=0.0, pred_act=True, eps=1.0))\n    self = MaskHungarianAssigner(**assigner_cfg)\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_labels.size(0)\n    assert (assign_result.labels > -1).sum() == gt_labels.size(0)\n\n    # test with mask focal mode\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=0.0),\n        mask_cost=dict(type='FocalLossCost', weight=1.0, binary_input=True),\n        dice_cost=dict(type='DiceCost', weight=0.0, pred_act=True, eps=1.0))\n    self = MaskHungarianAssigner(**assigner_cfg)\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_labels.size(0)\n    assert (assign_result.labels > -1).sum() == gt_labels.size(0)\n\n    # test with mask dice mode\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=0.0),\n        mask_cost=dict(type='FocalLossCost', weight=0.0, binary_input=True),\n        dice_cost=dict(type='DiceCost', weight=1.0, pred_act=True, eps=1.0))\n    self = MaskHungarianAssigner(**assigner_cfg)\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_labels.size(0)\n    assert (assign_result.labels > -1).sum() == gt_labels.size(0)\n\n    # test with mask dice mode that naive_dice is False\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=0.0),\n        mask_cost=dict(type='FocalLossCost', weight=0.0, binary_input=True),\n        dice_cost=dict(\n            type='DiceCost',\n            weight=1.0,\n            pred_act=True,\n            eps=1.0,\n            naive_dice=False))\n    self = MaskHungarianAssigner(**assigner_cfg)\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_labels.size(0)\n    assert (assign_result.labels > -1).sum() == gt_labels.size(0)\n\n    # test with mask bce mode\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=0.0),\n        mask_cost=dict(\n            type='CrossEntropyLossCost', weight=1.0, use_sigmoid=True),\n        dice_cost=dict(type='DiceCost', weight=0.0, pred_act=True, eps=1.0))\n    self = MaskHungarianAssigner(**assigner_cfg)\n    assign_result = self.assign(cls_pred, mask_pred, gt_labels, gt_masks,\n                                img_meta)\n    assert torch.all(assign_result.gt_inds > -1)\n    assert (assign_result.gt_inds > 0).sum() == gt_labels.size(0)\n    assert (assign_result.labels > -1).sum() == gt_labels.size(0)\n\n    # test with ce mode of CrossEntropyLossCost which is not supported yet\n    assigner_cfg = dict(\n        cls_cost=dict(type='ClassificationCost', weight=0.0),\n        mask_cost=dict(\n            type='CrossEntropyLossCost', weight=1.0, use_sigmoid=False),\n        dice_cost=dict(type='DiceCost', weight=0.0, pred_act=True, eps=1.0))\n    with pytest.raises(AssertionError):\n        self = MaskHungarianAssigner(**assigner_cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_coder.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\nimport torch\n\nfrom mmdet.core.bbox.coder import (DeltaXYWHBBoxCoder, DistancePointBBoxCoder,\n                                   TBLRBBoxCoder, YOLOBBoxCoder)\n\n\ndef test_yolo_bbox_coder():\n    coder = YOLOBBoxCoder()\n    bboxes = torch.Tensor([[-42., -29., 74., 61.], [-10., -29., 106., 61.],\n                           [22., -29., 138., 61.], [54., -29., 170., 61.]])\n    pred_bboxes = torch.Tensor([[0.4709, 0.6152, 0.1690, -0.4056],\n                                [0.5399, 0.6653, 0.1162, -0.4162],\n                                [0.4654, 0.6618, 0.1548, -0.4301],\n                                [0.4786, 0.6197, 0.1896, -0.4479]])\n    grid_size = 32\n    expected_decode_bboxes = torch.Tensor(\n        [[-53.6102, -10.3096, 83.7478, 49.6824],\n         [-15.8700, -8.3901, 114.4236, 50.9693],\n         [11.1822, -8.0924, 146.6034, 50.4476],\n         [41.2068, -8.9232, 181.4236, 48.5840]])\n    assert expected_decode_bboxes.allclose(\n        coder.decode(bboxes, pred_bboxes, grid_size))\n\n\ndef test_delta_bbox_coder():\n    coder = DeltaXYWHBBoxCoder()\n\n    rois = torch.Tensor([[0., 0., 1., 1.], [0., 0., 1., 1.], [0., 0., 1., 1.],\n                         [5., 5., 5., 5.]])\n    deltas = torch.Tensor([[0., 0., 0., 0.], [1., 1., 1., 1.],\n                           [0., 0., 2., -1.], [0.7, -1.9, -0.5, 0.3]])\n    expected_decode_bboxes = torch.Tensor([[0.0000, 0.0000, 1.0000, 1.0000],\n                                           [0.1409, 0.1409, 2.8591, 2.8591],\n                                           [0.0000, 0.3161, 4.1945, 0.6839],\n                                           [5.0000, 5.0000, 5.0000, 5.0000]])\n\n    out = coder.decode(rois, deltas, max_shape=(32, 32))\n    assert expected_decode_bboxes.allclose(out, atol=1e-04)\n    out = coder.decode(rois, deltas, max_shape=torch.Tensor((32, 32)))\n    assert expected_decode_bboxes.allclose(out, atol=1e-04)\n\n    batch_rois = rois.unsqueeze(0).repeat(2, 1, 1)\n    batch_deltas = deltas.unsqueeze(0).repeat(2, 1, 1)\n    batch_out = coder.decode(batch_rois, batch_deltas, max_shape=(32, 32))[0]\n    assert out.allclose(batch_out)\n    batch_out = coder.decode(\n        batch_rois, batch_deltas, max_shape=[(32, 32), (32, 32)])[0]\n    assert out.allclose(batch_out)\n\n    # test max_shape is not equal to batch\n    with pytest.raises(AssertionError):\n        coder.decode(\n            batch_rois, batch_deltas, max_shape=[(32, 32), (32, 32), (32, 32)])\n\n    rois = torch.zeros((0, 4))\n    deltas = torch.zeros((0, 4))\n    out = coder.decode(rois, deltas, max_shape=(32, 32))\n    assert rois.shape == out.shape\n\n    # test add_ctr_clamp\n    coder = DeltaXYWHBBoxCoder(add_ctr_clamp=True, ctr_clamp=2)\n\n    rois = torch.Tensor([[0., 0., 6., 6.], [0., 0., 1., 1.], [0., 0., 1., 1.],\n                         [5., 5., 5., 5.]])\n    deltas = torch.Tensor([[1., 1., 2., 2.], [1., 1., 1., 1.],\n                           [0., 0., 2., -1.], [0.7, -1.9, -0.5, 0.3]])\n    expected_decode_bboxes = torch.Tensor([[0.0000, 0.0000, 27.1672, 27.1672],\n                                           [0.1409, 0.1409, 2.8591, 2.8591],\n                                           [0.0000, 0.3161, 4.1945, 0.6839],\n                                           [5.0000, 5.0000, 5.0000, 5.0000]])\n\n    out = coder.decode(rois, deltas, max_shape=(32, 32))\n    assert expected_decode_bboxes.allclose(out, atol=1e-04)\n\n\ndef test_tblr_bbox_coder():\n    coder = TBLRBBoxCoder(normalizer=15.)\n\n    rois = torch.Tensor([[0., 0., 1., 1.], [0., 0., 1., 1.], [0., 0., 1., 1.],\n                         [5., 5., 5., 5.]])\n    deltas = torch.Tensor([[0., 0., 0., 0.], [1., 1., 1., 1.],\n                           [0., 0., 2., -1.], [0.7, -1.9, -0.5, 0.3]])\n    expected_decode_bboxes = torch.Tensor([[0.5000, 0.5000, 0.5000, 0.5000],\n                                           [0.0000, 0.0000, 12.0000, 13.0000],\n                                           [0.0000, 0.5000, 0.0000, 0.5000],\n                                           [5.0000, 5.0000, 5.0000, 5.0000]])\n\n    out = coder.decode(rois, deltas, max_shape=(13, 12))\n    assert expected_decode_bboxes.allclose(out)\n    out = coder.decode(rois, deltas, max_shape=torch.Tensor((13, 12)))\n    assert expected_decode_bboxes.allclose(out)\n\n    batch_rois = rois.unsqueeze(0).repeat(2, 1, 1)\n    batch_deltas = deltas.unsqueeze(0).repeat(2, 1, 1)\n    batch_out = coder.decode(batch_rois, batch_deltas, max_shape=(13, 12))[0]\n    assert out.allclose(batch_out)\n    batch_out = coder.decode(\n        batch_rois, batch_deltas, max_shape=[(13, 12), (13, 12)])[0]\n    assert out.allclose(batch_out)\n\n    # test max_shape is not equal to batch\n    with pytest.raises(AssertionError):\n        coder.decode(batch_rois, batch_deltas, max_shape=[(13, 12)])\n\n    rois = torch.zeros((0, 4))\n    deltas = torch.zeros((0, 4))\n    out = coder.decode(rois, deltas, max_shape=(32, 32))\n    assert rois.shape == out.shape\n\n\ndef test_distance_point_bbox_coder():\n    coder = DistancePointBBoxCoder()\n\n    points = torch.Tensor([[74., 61.], [-29., 106.], [138., 61.], [29., 170.]])\n    gt_bboxes = torch.Tensor([[74., 61., 75., 62.], [0., 104., 0., 112.],\n                              [100., 90., 100., 120.], [0., 120., 100., 120.]])\n    expected_distance = torch.Tensor([[0., 0., 1., 1.], [0., 2., 29., 6.],\n                                      [38., 0., 0., 50.], [29., 50., 50., 0.]])\n    out_distance = coder.encode(points, gt_bboxes, max_dis=50, eps=0)\n    assert expected_distance.allclose(out_distance)\n\n    distance = torch.Tensor([[0., 0, 1., 1.], [1., 2., 10., 6.],\n                             [22., -29., 138., 61.], [54., -29., 170., 61.]])\n    out_bbox = coder.decode(points, distance, max_shape=(120, 100))\n    assert gt_bboxes.allclose(out_bbox)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_compat_config.py",
    "content": "import pytest\nfrom mmcv import ConfigDict\n\nfrom mmdet.utils.compat_config import (compat_imgs_per_gpu, compat_loader_args,\n                                       compat_runner_args)\n\n\ndef test_compat_runner_args():\n    cfg = ConfigDict(dict(total_epochs=12))\n    with pytest.warns(None) as record:\n        cfg = compat_runner_args(cfg)\n    assert len(record) == 1\n    assert 'runner' in record.list[0].message.args[0]\n    assert 'runner' in cfg\n    assert cfg.runner.type == 'EpochBasedRunner'\n    assert cfg.runner.max_epochs == cfg.total_epochs\n\n\ndef test_compat_loader_args():\n    cfg = ConfigDict(dict(data=dict(val=dict(), test=dict(), train=dict())))\n    cfg = compat_loader_args(cfg)\n    # auto fill loader args\n    assert 'val_dataloader' in cfg.data\n    assert 'train_dataloader' in cfg.data\n    assert 'test_dataloader' in cfg.data\n    cfg = ConfigDict(\n        dict(\n            data=dict(\n                samples_per_gpu=1,\n                persistent_workers=True,\n                workers_per_gpu=1,\n                val=dict(samples_per_gpu=3),\n                test=dict(samples_per_gpu=2),\n                train=dict())))\n\n    cfg = compat_loader_args(cfg)\n\n    assert cfg.data.train_dataloader.workers_per_gpu == 1\n    assert cfg.data.train_dataloader.samples_per_gpu == 1\n    assert cfg.data.train_dataloader.persistent_workers\n    assert cfg.data.val_dataloader.workers_per_gpu == 1\n    assert cfg.data.val_dataloader.samples_per_gpu == 3\n    assert cfg.data.test_dataloader.workers_per_gpu == 1\n    assert cfg.data.test_dataloader.samples_per_gpu == 2\n\n    # test test is a list\n    cfg = ConfigDict(\n        dict(\n            data=dict(\n                samples_per_gpu=1,\n                persistent_workers=True,\n                workers_per_gpu=1,\n                val=dict(samples_per_gpu=3),\n                test=[dict(samples_per_gpu=2),\n                      dict(samples_per_gpu=3)],\n                train=dict())))\n\n    cfg = compat_loader_args(cfg)\n    assert cfg.data.test_dataloader.samples_per_gpu == 3\n\n    # assert can not set args at the same time\n    cfg = ConfigDict(\n        dict(\n            data=dict(\n                samples_per_gpu=1,\n                persistent_workers=True,\n                workers_per_gpu=1,\n                val=dict(samples_per_gpu=3),\n                test=dict(samples_per_gpu=2),\n                train=dict(),\n                train_dataloader=dict(samples_per_gpu=2))))\n    # samples_per_gpu can not be set in `train_dataloader`\n    # and data field at the same time\n    with pytest.raises(AssertionError):\n        compat_loader_args(cfg)\n    cfg = ConfigDict(\n        dict(\n            data=dict(\n                samples_per_gpu=1,\n                persistent_workers=True,\n                workers_per_gpu=1,\n                val=dict(samples_per_gpu=3),\n                test=dict(samples_per_gpu=2),\n                train=dict(),\n                val_dataloader=dict(samples_per_gpu=2))))\n    # samples_per_gpu can not be set in `val_dataloader`\n    # and data field at the same time\n    with pytest.raises(AssertionError):\n        compat_loader_args(cfg)\n    cfg = ConfigDict(\n        dict(\n            data=dict(\n                samples_per_gpu=1,\n                persistent_workers=True,\n                workers_per_gpu=1,\n                val=dict(samples_per_gpu=3),\n                test=dict(samples_per_gpu=2),\n                test_dataloader=dict(samples_per_gpu=2))))\n    # samples_per_gpu can not be set in `test_dataloader`\n    # and data field at the same time\n    with pytest.raises(AssertionError):\n        compat_loader_args(cfg)\n\n\ndef test_compat_imgs_per_gpu():\n    cfg = ConfigDict(\n        dict(\n            data=dict(\n                imgs_per_gpu=1,\n                samples_per_gpu=2,\n                val=dict(),\n                test=dict(),\n                train=dict())))\n    cfg = compat_imgs_per_gpu(cfg)\n    assert cfg.data.samples_per_gpu == cfg.data.imgs_per_gpu\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_general_data.py",
    "content": "import copy\n\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.core import GeneralData, InstanceData\n\n\ndef _equal(a, b):\n    if isinstance(a, (torch.Tensor, np.ndarray)):\n        return (a == b).all()\n    else:\n        return a == b\n\n\ndef test_general_data():\n\n    # test init\n    meta_info = dict(\n        img_size=[256, 256],\n        path='dadfaff',\n        scale_factor=np.array([1.5, 1.5]),\n        img_shape=torch.rand(4))\n\n    data = dict(\n        bboxes=torch.rand(4, 4),\n        labels=torch.rand(4),\n        masks=np.random.rand(4, 2, 2))\n\n    instance_data = GeneralData(meta_info=meta_info)\n    assert 'img_size' in instance_data\n    assert instance_data.img_size == [256, 256]\n    assert instance_data['img_size'] == [256, 256]\n    assert 'path' in instance_data\n    assert instance_data.path == 'dadfaff'\n\n    # test nice_repr\n    repr_instance_data = instance_data.new(data=data)\n    nice_repr = str(repr_instance_data)\n    for line in nice_repr.split('\\n'):\n        if 'masks' in line:\n            assert 'shape' in line\n            assert '(4, 2, 2)' in line\n        if 'bboxes' in line:\n            assert 'shape' in line\n            assert 'torch.Size([4, 4])' in line\n        if 'path' in line:\n            assert 'dadfaff' in line\n        if 'scale_factor' in line:\n            assert '[1.5 1.5]' in line\n\n    instance_data = GeneralData(\n        meta_info=meta_info, data=dict(bboxes=torch.rand(5)))\n    assert 'bboxes' in instance_data\n    assert len(instance_data.bboxes) == 5\n\n    # data should be a dict\n    with pytest.raises(AssertionError):\n        GeneralData(data=1)\n\n    # test set data\n    instance_data = GeneralData()\n    instance_data.set_data(data)\n    assert 'bboxes' in instance_data\n    assert len(instance_data.bboxes) == 4\n    assert 'masks' in instance_data\n    assert len(instance_data.masks) == 4\n    # data should be a dict\n    with pytest.raises(AssertionError):\n        instance_data.set_data(data=1)\n\n    # test set_meta\n    instance_data = GeneralData()\n    instance_data.set_meta_info(meta_info)\n    assert 'img_size' in instance_data\n    assert instance_data.img_size == [256, 256]\n    assert instance_data['img_size'] == [256, 256]\n    assert 'path' in instance_data\n    assert instance_data.path == 'dadfaff'\n    # can skip same value when overwrite\n    instance_data.set_meta_info(meta_info)\n\n    # meta should be a dict\n    with pytest.raises(AssertionError):\n        instance_data.set_meta_info(meta_info='fjhka')\n\n    # attribute in `_meta_info_field` is immutable once initialized\n    instance_data.set_meta_info(meta_info)\n    # meta should be immutable\n    with pytest.raises(KeyError):\n        instance_data.set_meta_info(dict(img_size=[254, 251]))\n    with pytest.raises(KeyError):\n        duplicate_meta_info = copy.deepcopy(meta_info)\n        duplicate_meta_info['path'] = 'dada'\n        instance_data.set_meta_info(duplicate_meta_info)\n    with pytest.raises(KeyError):\n        duplicate_meta_info = copy.deepcopy(meta_info)\n        duplicate_meta_info['scale_factor'] = np.array([1.5, 1.6])\n        instance_data.set_meta_info(duplicate_meta_info)\n\n    # test new_instance_data\n    instance_data = GeneralData(meta_info)\n    new_instance_data = instance_data.new()\n    for k, v in instance_data.meta_info_items():\n        assert k in new_instance_data\n        _equal(v, new_instance_data[k])\n\n    instance_data = GeneralData(meta_info, data=data)\n    temp_meta = copy.deepcopy(meta_info)\n    temp_data = copy.deepcopy(data)\n    temp_data['time'] = '12212'\n    temp_meta['img_norm'] = np.random.random(3)\n\n    new_instance_data = instance_data.new(meta_info=temp_meta, data=temp_data)\n    for k, v in new_instance_data.meta_info_items():\n        if k in instance_data:\n            _equal(v, instance_data[k])\n        else:\n            assert _equal(v, temp_meta[k])\n            assert k == 'img_norm'\n\n    for k, v in new_instance_data.items():\n        if k in instance_data:\n            _equal(v, instance_data[k])\n        else:\n            assert k == 'time'\n            assert _equal(v, temp_data[k])\n\n    # test keys\n    instance_data = GeneralData(meta_info, data=dict(bboxes=10))\n    assert 'bboxes' in instance_data.keys()\n    instance_data.b = 10\n    assert 'b' in instance_data\n\n    # test meta keys\n    instance_data = GeneralData(meta_info, data=dict(bboxes=10))\n    assert 'path' in instance_data.meta_info_keys()\n    assert len(instance_data.meta_info_keys()) == len(meta_info)\n    instance_data.set_meta_info(dict(workdir='fafaf'))\n    assert 'workdir' in instance_data\n    assert len(instance_data.meta_info_keys()) == len(meta_info) + 1\n\n    # test values\n    instance_data = GeneralData(meta_info, data=dict(bboxes=10))\n    assert 10 in instance_data.values()\n    assert len(instance_data.values()) == 1\n\n    # test meta values\n    instance_data = GeneralData(meta_info, data=dict(bboxes=10))\n    # torch 1.3 eq() can not compare str and tensor\n    from mmdet import digit_version\n    if digit_version(torch.__version__) >= [1, 4]:\n        assert 'dadfaff' in instance_data.meta_info_values()\n    assert len(instance_data.meta_info_values()) == len(meta_info)\n\n    # test items\n    instance_data = GeneralData(data=data)\n    for k, v in instance_data.items():\n        assert k in data\n        assert _equal(v, data[k])\n\n    # test meta_info_items\n    instance_data = GeneralData(meta_info=meta_info)\n    for k, v in instance_data.meta_info_items():\n        assert k in meta_info\n        assert _equal(v, meta_info[k])\n\n    # test __setattr__\n    new_instance_data = GeneralData(data=data)\n    new_instance_data.mask = torch.rand(3, 4, 5)\n    new_instance_data.bboxes = torch.rand(2, 4)\n    assert 'mask' in new_instance_data\n    assert len(new_instance_data.mask) == 3\n    assert len(new_instance_data.bboxes) == 2\n\n    # test instance_data_field has been updated\n    assert 'mask' in new_instance_data._data_fields\n    assert 'bboxes' in new_instance_data._data_fields\n\n    for k in data:\n        assert k in new_instance_data._data_fields\n\n    # '_meta_info_field', '_data_fields' is immutable.\n    with pytest.raises(AttributeError):\n        new_instance_data._data_fields = None\n    with pytest.raises(AttributeError):\n        new_instance_data._meta_info_fields = None\n    with pytest.raises(AttributeError):\n        del new_instance_data._data_fields\n    with pytest.raises(AttributeError):\n        del new_instance_data._meta_info_fields\n\n    # key in _meta_info_field is immutable\n    new_instance_data.set_meta_info(meta_info)\n    with pytest.raises(KeyError):\n        del new_instance_data.img_size\n    with pytest.raises(KeyError):\n        del new_instance_data.scale_factor\n    for k in new_instance_data.meta_info_keys():\n        with pytest.raises(AttributeError):\n            new_instance_data[k] = None\n\n    # test __delattr__\n    # test key can be removed in instance_data_field\n    assert 'mask' in new_instance_data._data_fields\n    assert 'mask' in new_instance_data.keys()\n    assert 'mask' in new_instance_data\n    assert hasattr(new_instance_data, 'mask')\n    del new_instance_data.mask\n    assert 'mask' not in new_instance_data.keys()\n    assert 'mask' not in new_instance_data\n    assert 'mask' not in new_instance_data._data_fields\n    assert not hasattr(new_instance_data, 'mask')\n\n    # tset __delitem__\n    new_instance_data.mask = torch.rand(1, 2, 3)\n    assert 'mask' in new_instance_data._data_fields\n    assert 'mask' in new_instance_data\n    assert hasattr(new_instance_data, 'mask')\n    del new_instance_data['mask']\n    assert 'mask' not in new_instance_data\n    assert 'mask' not in new_instance_data._data_fields\n    assert 'mask' not in new_instance_data\n    assert not hasattr(new_instance_data, 'mask')\n\n    # test __setitem__\n    new_instance_data['mask'] = torch.rand(1, 2, 3)\n    assert 'mask' in new_instance_data._data_fields\n    assert 'mask' in new_instance_data.keys()\n    assert hasattr(new_instance_data, 'mask')\n\n    # test data_fields has been updated\n    assert 'mask' in new_instance_data.keys()\n    assert 'mask' in new_instance_data._data_fields\n\n    # '_meta_info_field', '_data_fields' is immutable.\n    with pytest.raises(AttributeError):\n        del new_instance_data['_data_fields']\n    with pytest.raises(AttributeError):\n        del new_instance_data['_meta_info_field']\n\n    #  test __getitem__\n    new_instance_data.mask is new_instance_data['mask']\n\n    # test get\n    assert new_instance_data.get('mask') is new_instance_data.mask\n    assert new_instance_data.get('none_attribute', None) is None\n    assert new_instance_data.get('none_attribute', 1) == 1\n\n    # test pop\n    mask = new_instance_data.mask\n    assert new_instance_data.pop('mask') is mask\n    assert new_instance_data.pop('mask', None) is None\n    assert new_instance_data.pop('mask', 1) == 1\n\n    # '_meta_info_field', '_data_fields' is immutable.\n    with pytest.raises(KeyError):\n        new_instance_data.pop('_data_fields')\n    with pytest.raises(KeyError):\n        new_instance_data.pop('_meta_info_field')\n    # attribute in `_meta_info_field` is immutable\n    with pytest.raises(KeyError):\n        new_instance_data.pop('img_size')\n    # test pop attribute in instance_data_filed\n    new_instance_data['mask'] = torch.rand(1, 2, 3)\n    new_instance_data.pop('mask')\n    # test data_field has been updated\n    assert 'mask' not in new_instance_data\n    assert 'mask' not in new_instance_data._data_fields\n    assert 'mask' not in new_instance_data\n\n    # test_keys\n    new_instance_data.mask = torch.ones(1, 2, 3)\n    'mask' in new_instance_data.keys()\n    has_flag = False\n    for key in new_instance_data.keys():\n        if key == 'mask':\n            has_flag = True\n    assert has_flag\n\n    # test values\n    assert len(list(new_instance_data.keys())) == len(\n        list(new_instance_data.values()))\n    mask = new_instance_data.mask\n    has_flag = False\n    for value in new_instance_data.values():\n        if value is mask:\n            has_flag = True\n    assert has_flag\n\n    # test items\n    assert len(list(new_instance_data.keys())) == len(\n        list(new_instance_data.items()))\n    mask = new_instance_data.mask\n    has_flag = False\n    for key, value in new_instance_data.items():\n        if value is mask:\n            assert key == 'mask'\n            has_flag = True\n    assert has_flag\n\n    # test device\n    new_instance_data = GeneralData()\n    if torch.cuda.is_available():\n        newnew_instance_data = new_instance_data.new()\n        devices = ('cpu', 'cuda')\n        for i in range(10):\n            device = devices[i % 2]\n            newnew_instance_data[f'{i}'] = torch.rand(1, 2, 3, device=device)\n        newnew_instance_data = newnew_instance_data.cpu()\n        for value in newnew_instance_data.values():\n            assert not value.is_cuda\n        newnew_instance_data = new_instance_data.new()\n        devices = ('cuda', 'cpu')\n        for i in range(10):\n            device = devices[i % 2]\n            newnew_instance_data[f'{i}'] = torch.rand(1, 2, 3, device=device)\n        newnew_instance_data = newnew_instance_data.cuda()\n        for value in newnew_instance_data.values():\n            assert value.is_cuda\n    # test to\n    double_instance_data = instance_data.new()\n    double_instance_data.long = torch.LongTensor(1, 2, 3, 4)\n    double_instance_data.bool = torch.BoolTensor(1, 2, 3, 4)\n    double_instance_data = instance_data.to(torch.double)\n    for k, v in double_instance_data.items():\n        if isinstance(v, torch.Tensor):\n            assert v.dtype is torch.double\n\n    # test .cpu() .cuda()\n    if torch.cuda.is_available():\n        cpu_instance_data = double_instance_data.new()\n        cpu_instance_data.mask = torch.rand(1)\n        cuda_tensor = torch.rand(1, 2, 3).cuda()\n        cuda_instance_data = cpu_instance_data.to(cuda_tensor.device)\n        for value in cuda_instance_data.values():\n            assert value.is_cuda\n        cpu_instance_data = cuda_instance_data.cpu()\n        for value in cpu_instance_data.values():\n            assert not value.is_cuda\n        cuda_instance_data = cpu_instance_data.cuda()\n        for value in cuda_instance_data.values():\n            assert value.is_cuda\n\n    # test detach\n    grad_instance_data = double_instance_data.new()\n    grad_instance_data.mask = torch.rand(2, requires_grad=True)\n    grad_instance_data.mask_1 = torch.rand(2, requires_grad=True)\n    detach_instance_data = grad_instance_data.detach()\n    for value in detach_instance_data.values():\n        assert not value.requires_grad\n\n    # test numpy\n    tensor_instance_data = double_instance_data.new()\n    tensor_instance_data.mask = torch.rand(2, requires_grad=True)\n    tensor_instance_data.mask_1 = torch.rand(2, requires_grad=True)\n    numpy_instance_data = tensor_instance_data.numpy()\n    for value in numpy_instance_data.values():\n        assert isinstance(value, np.ndarray)\n    if torch.cuda.is_available():\n        tensor_instance_data = double_instance_data.new()\n        tensor_instance_data.mask = torch.rand(2)\n        tensor_instance_data.mask_1 = torch.rand(2)\n        tensor_instance_data = tensor_instance_data.cuda()\n        numpy_instance_data = tensor_instance_data.numpy()\n        for value in numpy_instance_data.values():\n            assert isinstance(value, np.ndarray)\n\n    instance_data['_c'] = 10000\n    instance_data.get('dad', None) is None\n    assert hasattr(instance_data, '_c')\n    del instance_data['_c']\n    assert not hasattr(instance_data, '_c')\n    instance_data.a = 1000\n    instance_data['a'] = 2000\n    assert instance_data['a'] == 2000\n    assert instance_data.a == 2000\n    assert instance_data.get('a') == instance_data['a'] == instance_data.a\n    instance_data._meta = 1000\n    assert '_meta' in instance_data.keys()\n    if torch.cuda.is_available():\n        instance_data.bbox = torch.ones(2, 3, 4, 5).cuda()\n        instance_data.score = torch.ones(2, 3, 4, 4)\n    else:\n        instance_data.bbox = torch.ones(2, 3, 4, 5)\n\n    assert len(instance_data.new().keys()) == 0\n    with pytest.raises(AttributeError):\n        instance_data.img_size = 100\n\n    for k, v in instance_data.items():\n        if k == 'bbox':\n            assert isinstance(v, torch.Tensor)\n    assert 'a' in instance_data\n    instance_data.pop('a')\n    assert 'a' not in instance_data\n\n    cpu_instance_data = instance_data.cpu()\n    for k, v in cpu_instance_data.items():\n        if isinstance(v, torch.Tensor):\n            assert not v.is_cuda\n\n    assert isinstance(cpu_instance_data.numpy().bbox, np.ndarray)\n\n    if torch.cuda.is_available():\n        cuda_resutls = instance_data.cuda()\n        for k, v in cuda_resutls.items():\n            if isinstance(v, torch.Tensor):\n                assert v.is_cuda\n\n\ndef test_instance_data():\n    meta_info = dict(\n        img_size=(256, 256),\n        path='dadfaff',\n        scale_factor=np.array([1.5, 1.5, 1, 1]))\n\n    data = dict(\n        bboxes=torch.rand(4, 4),\n        masks=torch.rand(4, 2, 2),\n        labels=np.random.rand(4),\n        size=[(i, i) for i in range(4)])\n\n    # test init\n    instance_data = InstanceData(meta_info)\n    assert 'path' in instance_data\n    instance_data = InstanceData(meta_info, data=data)\n    assert len(instance_data) == 4\n    instance_data.set_data(data)\n    assert len(instance_data) == 4\n\n    meta_info = copy.deepcopy(meta_info)\n    meta_info['img_name'] = 'flag'\n\n    # test newinstance_data\n    new_instance_data = instance_data.new(meta_info=meta_info)\n    for k, v in new_instance_data.meta_info_items():\n        if k in instance_data:\n            _equal(v, instance_data[k])\n        else:\n            assert _equal(v, meta_info[k])\n            assert k == 'img_name'\n    # meta info is immutable\n    with pytest.raises(KeyError):\n        meta_info = copy.deepcopy(meta_info)\n        meta_info['path'] = 'fdasfdsd'\n        instance_data.new(meta_info=meta_info)\n\n    # data fields should have same length\n    with pytest.raises(AssertionError):\n        temp_data = copy.deepcopy(data)\n        temp_data['bboxes'] = torch.rand(5, 4)\n        instance_data.new(data=temp_data)\n\n    temp_data = copy.deepcopy(data)\n    temp_data['scores'] = torch.rand(4)\n    new_instance_data = instance_data.new(data=temp_data)\n    for k, v in new_instance_data.items():\n        if k in instance_data:\n            _equal(v, instance_data[k])\n        else:\n            assert k == 'scores'\n            assert _equal(v, temp_data[k])\n\n    instance_data = instance_data.new()\n\n    # test __setattr__\n    # '_meta_info_field', '_data_fields' is immutable.\n    with pytest.raises(AttributeError):\n        instance_data._data_fields = dict()\n    with pytest.raises(AttributeError):\n        instance_data._data_fields = dict()\n\n    # all attribute in instance_data_field should be\n    # (torch.Tensor, np.ndarray, list))\n    with pytest.raises(AssertionError):\n        instance_data.a = 1000\n\n    # instance_data field should has same length\n    new_instance_data = instance_data.new()\n    new_instance_data.det_bbox = torch.rand(100, 4)\n    new_instance_data.det_label = torch.arange(100)\n    with pytest.raises(AssertionError):\n        new_instance_data.scores = torch.rand(101, 1)\n    new_instance_data.none = [None] * 100\n    with pytest.raises(AssertionError):\n        new_instance_data.scores = [None] * 101\n    new_instance_data.numpy_det = np.random.random([100, 1])\n    with pytest.raises(AssertionError):\n        new_instance_data.scores = np.random.random([101, 1])\n\n    # isinstance(str, slice, int, torch.LongTensor, torch.BoolTensor)\n    item = torch.Tensor([1, 2, 3, 4])\n    with pytest.raises(AssertionError):\n        new_instance_data[item]\n    len(new_instance_data[item.long()]) == 1\n\n    # when input is a bool tensor, The shape of\n    # the input at index 0 should equal to\n    # the value length in instance_data_field\n    with pytest.raises(AssertionError):\n        new_instance_data[item.bool()]\n\n    for i in range(len(new_instance_data)):\n        assert new_instance_data[i].det_label == i\n        assert len(new_instance_data[i]) == 1\n\n    # assert the index should in 0 ~ len(instance_data) -1\n    with pytest.raises(IndexError):\n        new_instance_data[101]\n\n    # assert the index should not be an empty tensor\n    new_new_instance_data = new_instance_data.new()\n    with pytest.raises(AssertionError):\n        new_new_instance_data[0]\n\n    # test str\n    with pytest.raises(AssertionError):\n        instance_data.img_size_dummmy = meta_info['img_size']\n\n    # test slice\n    ten_ressults = new_instance_data[:10]\n    len(ten_ressults) == 10\n    for v in ten_ressults.values():\n        assert len(v) == 10\n\n    # test Longtensor\n    long_tensor = torch.randint(100, (50, ))\n    long_index_instance_data = new_instance_data[long_tensor]\n    assert len(long_index_instance_data) == len(long_tensor)\n    for key, value in long_index_instance_data.items():\n        if not isinstance(value, list):\n            assert (long_index_instance_data[key] == new_instance_data[key]\n                    [long_tensor]).all()\n        else:\n            len(long_tensor) == len(value)\n\n    # test bool tensor\n    bool_tensor = torch.rand(100) > 0.5\n    bool_index_instance_data = new_instance_data[bool_tensor]\n    assert len(bool_index_instance_data) == bool_tensor.sum()\n    for key, value in bool_index_instance_data.items():\n        if not isinstance(value, list):\n            assert (bool_index_instance_data[key] == new_instance_data[key]\n                    [bool_tensor]).all()\n        else:\n            assert len(value) == bool_tensor.sum()\n\n    num_instance = 1000\n    instance_data_list = []\n\n    # assert len(instance_lists) > 0\n    with pytest.raises(AssertionError):\n        instance_data.cat(instance_data_list)\n\n    for _ in range(2):\n        instance_data['bbox'] = torch.rand(num_instance, 4)\n        instance_data['label'] = torch.rand(num_instance, 1)\n        instance_data['mask'] = torch.rand(num_instance, 224, 224)\n        instance_data['instances_infos'] = [1] * num_instance\n        instance_data['cpu_bbox'] = np.random.random((num_instance, 4))\n        if torch.cuda.is_available():\n            instance_data.cuda_tensor = torch.rand(num_instance).cuda()\n            assert instance_data.cuda_tensor.is_cuda\n            cuda_instance_data = instance_data.cuda()\n            assert cuda_instance_data.cuda_tensor.is_cuda\n\n        assert len(instance_data[0]) == 1\n        with pytest.raises(IndexError):\n            return instance_data[num_instance + 1]\n        with pytest.raises(AssertionError):\n            instance_data.centerness = torch.rand(num_instance + 1, 1)\n\n        mask_tensor = torch.rand(num_instance) > 0.5\n        length = mask_tensor.sum()\n        assert len(instance_data[mask_tensor]) == length\n\n        index_tensor = torch.LongTensor([1, 5, 8, 110, 399])\n        length = len(index_tensor)\n\n        assert len(instance_data[index_tensor]) == length\n\n        instance_data_list.append(instance_data)\n\n    cat_resutls = InstanceData.cat(instance_data_list)\n    assert len(cat_resutls) == num_instance * 2\n\n    instances = InstanceData(data=dict(bboxes=torch.rand(4, 4)))\n    # cat only single instance\n    assert len(InstanceData.cat([instances])) == 4\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_hook.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport logging\nimport shutil\nimport sys\nimport tempfile\nfrom unittest.mock import MagicMock, Mock, call, patch\n\nimport numpy as np\nimport pytest\nimport torch\nimport torch.nn as nn\nfrom mmcv.runner import (CheckpointHook, IterTimerHook, PaviLoggerHook,\n                         build_runner)\nfrom torch.nn.init import constant_\nfrom torch.utils.data import DataLoader, Dataset\n\nfrom mmdet.core.hook import ExpMomentumEMAHook, YOLOXLrUpdaterHook\nfrom mmdet.core.hook.sync_norm_hook import SyncNormHook\nfrom mmdet.core.hook.sync_random_size_hook import SyncRandomSizeHook\n\n\ndef _build_demo_runner_without_hook(runner_type='EpochBasedRunner',\n                                    max_epochs=1,\n                                    max_iters=None,\n                                    multi_optimziers=False):\n\n    class Model(nn.Module):\n\n        def __init__(self):\n            super().__init__()\n            self.linear = nn.Linear(2, 1)\n            self.conv = nn.Conv2d(3, 3, 3)\n\n        def forward(self, x):\n            return self.linear(x)\n\n        def train_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n        def val_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n    model = Model()\n\n    if multi_optimziers:\n        optimizer = {\n            'model1':\n            torch.optim.SGD(model.linear.parameters(), lr=0.02, momentum=0.95),\n            'model2':\n            torch.optim.SGD(model.conv.parameters(), lr=0.01, momentum=0.9),\n        }\n    else:\n        optimizer = torch.optim.SGD(model.parameters(), lr=0.02, momentum=0.95)\n\n    tmp_dir = tempfile.mkdtemp()\n    runner = build_runner(\n        dict(type=runner_type),\n        default_args=dict(\n            model=model,\n            work_dir=tmp_dir,\n            optimizer=optimizer,\n            logger=logging.getLogger(),\n            max_epochs=max_epochs,\n            max_iters=max_iters))\n    return runner\n\n\ndef _build_demo_runner(runner_type='EpochBasedRunner',\n                       max_epochs=1,\n                       max_iters=None,\n                       multi_optimziers=False):\n    log_config = dict(\n        interval=1, hooks=[\n            dict(type='TextLoggerHook'),\n        ])\n\n    runner = _build_demo_runner_without_hook(runner_type, max_epochs,\n                                             max_iters, multi_optimziers)\n\n    runner.register_checkpoint_hook(dict(interval=1))\n    runner.register_logger_hooks(log_config)\n    return runner\n\n\n@pytest.mark.parametrize('multi_optimziers', (True, False))\ndef test_yolox_lrupdater_hook(multi_optimziers):\n    \"\"\"xdoctest -m tests/test_hooks.py test_cosine_runner_hook.\"\"\"\n    # Only used to prevent program errors\n    YOLOXLrUpdaterHook(0, min_lr_ratio=0.05)\n\n    sys.modules['pavi'] = MagicMock()\n    loader = DataLoader(torch.ones((10, 2)))\n    runner = _build_demo_runner(multi_optimziers=multi_optimziers)\n\n    hook_cfg = dict(\n        type='YOLOXLrUpdaterHook',\n        warmup='exp',\n        by_epoch=False,\n        warmup_by_epoch=True,\n        warmup_ratio=1,\n        warmup_iters=5,  # 5 epoch\n        num_last_epochs=15,\n        min_lr_ratio=0.05)\n    runner.register_hook_from_cfg(hook_cfg)\n    runner.register_hook_from_cfg(dict(type='IterTimerHook'))\n    runner.register_hook(IterTimerHook())\n\n    # add pavi hook\n    hook = PaviLoggerHook(interval=1, add_graph=False, add_last_ckpt=True)\n    runner.register_hook(hook)\n    runner.run([loader], [('train', 1)])\n    shutil.rmtree(runner.work_dir)\n\n    # TODO: use a more elegant way to check values\n    assert hasattr(hook, 'writer')\n    if multi_optimziers:\n        calls = [\n            call(\n                'train', {\n                    'learning_rate/model1': 8.000000000000001e-06,\n                    'learning_rate/model2': 4.000000000000001e-06,\n                    'momentum/model1': 0.95,\n                    'momentum/model2': 0.9\n                }, 1),\n            call(\n                'train', {\n                    'learning_rate/model1': 0.00039200000000000004,\n                    'learning_rate/model2': 0.00019600000000000002,\n                    'momentum/model1': 0.95,\n                    'momentum/model2': 0.9\n                }, 7),\n            call(\n                'train', {\n                    'learning_rate/model1': 0.0008000000000000001,\n                    'learning_rate/model2': 0.0004000000000000001,\n                    'momentum/model1': 0.95,\n                    'momentum/model2': 0.9\n                }, 10)\n        ]\n    else:\n        calls = [\n            call('train', {\n                'learning_rate': 8.000000000000001e-06,\n                'momentum': 0.95\n            }, 1),\n            call('train', {\n                'learning_rate': 0.00039200000000000004,\n                'momentum': 0.95\n            }, 7),\n            call('train', {\n                'learning_rate': 0.0008000000000000001,\n                'momentum': 0.95\n            }, 10)\n        ]\n    hook.writer.add_scalars.assert_has_calls(calls, any_order=True)\n\n\ndef test_ema_hook():\n    \"\"\"xdoctest -m tests/test_hooks.py test_ema_hook.\"\"\"\n\n    class DemoModel(nn.Module):\n\n        def __init__(self):\n            super().__init__()\n            self.conv = nn.Conv2d(\n                in_channels=1,\n                out_channels=2,\n                kernel_size=1,\n                padding=1,\n                bias=True)\n            self.bn = nn.BatchNorm2d(2)\n\n            self._init_weight()\n\n        def _init_weight(self):\n            constant_(self.conv.weight, 0)\n            constant_(self.conv.bias, 0)\n            constant_(self.bn.weight, 0)\n            constant_(self.bn.bias, 0)\n\n        def forward(self, x):\n            return self.bn(self.conv(x)).sum()\n\n        def train_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n        def val_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n    loader = DataLoader(torch.ones((1, 1, 1, 1)))\n    runner = _build_demo_runner()\n    demo_model = DemoModel()\n    runner.model = demo_model\n    ema_hook = ExpMomentumEMAHook(\n        momentum=0.0002,\n        total_iter=1,\n        skip_buffers=True,\n        interval=2,\n        resume_from=None)\n    checkpointhook = CheckpointHook(interval=1, by_epoch=True)\n    runner.register_hook(ema_hook, priority='HIGHEST')\n    runner.register_hook(checkpointhook)\n    runner.run([loader, loader], [('train', 1), ('val', 1)])\n    checkpoint = torch.load(f'{runner.work_dir}/epoch_1.pth')\n    num_eam_params = 0\n    for name, value in checkpoint['state_dict'].items():\n        if 'ema' in name:\n            num_eam_params += 1\n            value.fill_(1)\n    assert num_eam_params == 4\n    torch.save(checkpoint, f'{runner.work_dir}/epoch_1.pth')\n\n    work_dir = runner.work_dir\n    resume_ema_hook = ExpMomentumEMAHook(\n        momentum=0.5,\n        total_iter=10,\n        skip_buffers=True,\n        interval=1,\n        resume_from=f'{work_dir}/epoch_1.pth')\n    runner = _build_demo_runner(max_epochs=2)\n    runner.model = demo_model\n    runner.register_hook(resume_ema_hook, priority='HIGHEST')\n    checkpointhook = CheckpointHook(interval=1, by_epoch=True)\n    runner.register_hook(checkpointhook)\n    runner.run([loader, loader], [('train', 1), ('val', 1)])\n    checkpoint = torch.load(f'{runner.work_dir}/epoch_2.pth')\n    num_eam_params = 0\n    desired_output = [0.9094, 0.9094]\n    for name, value in checkpoint['state_dict'].items():\n        if 'ema' in name:\n            num_eam_params += 1\n            assert value.sum() == 2\n        else:\n            if ('weight' in name) or ('bias' in name):\n                np.allclose(value.data.cpu().numpy().reshape(-1),\n                            desired_output, 1e-4)\n    assert num_eam_params == 4\n    shutil.rmtree(runner.work_dir)\n    shutil.rmtree(work_dir)\n\n\ndef test_sync_norm_hook():\n    # Only used to prevent program errors\n    SyncNormHook()\n\n    loader = DataLoader(torch.ones((5, 2)))\n    runner = _build_demo_runner()\n    runner.register_hook_from_cfg(dict(type='SyncNormHook'))\n    runner.run([loader, loader], [('train', 1), ('val', 1)])\n    shutil.rmtree(runner.work_dir)\n\n\ndef test_sync_random_size_hook():\n    # Only used to prevent program errors\n    SyncRandomSizeHook()\n\n    class DemoDataset(Dataset):\n\n        def __getitem__(self, item):\n            return torch.ones(2)\n\n        def __len__(self):\n            return 5\n\n        def update_dynamic_scale(self, dynamic_scale):\n            pass\n\n    loader = DataLoader(DemoDataset())\n    runner = _build_demo_runner()\n    runner.register_hook_from_cfg(\n        dict(type='SyncRandomSizeHook', device='cpu'))\n    runner.run([loader, loader], [('train', 1), ('val', 1)])\n    shutil.rmtree(runner.work_dir)\n\n    if torch.cuda.is_available():\n        runner = _build_demo_runner()\n        runner.register_hook_from_cfg(\n            dict(type='SyncRandomSizeHook', device='cuda'))\n        runner.run([loader, loader], [('train', 1), ('val', 1)])\n        shutil.rmtree(runner.work_dir)\n\n\n@pytest.mark.parametrize('set_loss', [\n    dict(set_loss_nan=False, set_loss_inf=False),\n    dict(set_loss_nan=True, set_loss_inf=False),\n    dict(set_loss_nan=False, set_loss_inf=True)\n])\ndef test_check_invalid_loss_hook(set_loss):\n    # Check whether loss is valid during training.\n\n    class DemoModel(nn.Module):\n\n        def __init__(self, set_loss_nan=False, set_loss_inf=False):\n            super().__init__()\n            self.set_loss_nan = set_loss_nan\n            self.set_loss_inf = set_loss_inf\n            self.linear = nn.Linear(2, 1)\n\n        def forward(self, x):\n            return self.linear(x)\n\n        def train_step(self, x, optimizer, **kwargs):\n            if self.set_loss_nan:\n                return dict(loss=torch.tensor(float('nan')))\n            elif self.set_loss_inf:\n                return dict(loss=torch.tensor(float('inf')))\n            else:\n                return dict(loss=self(x))\n\n    loader = DataLoader(torch.ones((5, 2)))\n    runner = _build_demo_runner()\n\n    demo_model = DemoModel(**set_loss)\n    runner.model = demo_model\n    runner.register_hook_from_cfg(\n        dict(type='CheckInvalidLossHook', interval=1))\n    if not set_loss['set_loss_nan'] \\\n            and not set_loss['set_loss_inf']:\n        # check loss is valid\n        runner.run([loader], [('train', 1)])\n    else:\n        # check loss is nan or inf\n        with pytest.raises(AssertionError):\n            runner.run([loader], [('train', 1)])\n    shutil.rmtree(runner.work_dir)\n\n\ndef test_set_epoch_info_hook():\n    \"\"\"Test SetEpochInfoHook.\"\"\"\n\n    class DemoModel(nn.Module):\n\n        def __init__(self):\n            super().__init__()\n            self.epoch = 0\n            self.linear = nn.Linear(2, 1)\n\n        def forward(self, x):\n            return self.linear(x)\n\n        def train_step(self, x, optimizer, **kwargs):\n            return dict(loss=self(x))\n\n        def set_epoch(self, epoch):\n            self.epoch = epoch\n\n    loader = DataLoader(torch.ones((5, 2)))\n    runner = _build_demo_runner(max_epochs=3)\n\n    demo_model = DemoModel()\n    runner.model = demo_model\n    runner.register_hook_from_cfg(dict(type='SetEpochInfoHook'))\n    runner.run([loader], [('train', 1)])\n    assert demo_model.epoch == 2\n\n\ndef test_memory_profiler_hook():\n    from collections import namedtuple\n\n    # test ImportError without psutil and memory_profiler\n    with pytest.raises(ImportError):\n        from mmdet.core.hook import MemoryProfilerHook\n        MemoryProfilerHook(1)\n\n    # test ImportError without memory_profiler\n    sys.modules['psutil'] = MagicMock()\n    with pytest.raises(ImportError):\n        from mmdet.core.hook import MemoryProfilerHook\n        MemoryProfilerHook(1)\n\n    sys.modules['memory_profiler'] = MagicMock()\n\n    def _mock_virtual_memory():\n        virtual_memory_type = namedtuple(\n            'virtual_memory', ['total', 'available', 'percent', 'used'])\n        return virtual_memory_type(\n            total=270109085696,\n            available=250416816128,\n            percent=7.3,\n            used=17840881664)\n\n    def _mock_swap_memory():\n        swap_memory_type = namedtuple('swap_memory', [\n            'total',\n            'used',\n            'percent',\n        ])\n        return swap_memory_type(total=8589930496, used=0, percent=0.0)\n\n    def _mock_memory_usage():\n        return [40.22265625]\n\n    mock_virtual_memory = Mock(return_value=_mock_virtual_memory())\n    mock_swap_memory = Mock(return_value=_mock_swap_memory())\n    mock_memory_usage = Mock(return_value=_mock_memory_usage())\n\n    @patch('psutil.swap_memory', mock_swap_memory)\n    @patch('psutil.virtual_memory', mock_virtual_memory)\n    @patch('memory_profiler.memory_usage', mock_memory_usage)\n    def _test_memory_profiler_hook():\n        from mmdet.core.hook import MemoryProfilerHook\n        hook = MemoryProfilerHook(1)\n        runner = _build_demo_runner()\n\n        assert not mock_memory_usage.called\n        assert not mock_swap_memory.called\n        assert not mock_memory_usage.called\n\n        hook.after_iter(runner)\n\n        assert mock_memory_usage.called\n        assert mock_swap_memory.called\n        assert mock_memory_usage.called\n\n    _test_memory_profiler_hook()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_layer_decay_optimizer_constructor.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport torch\nimport torch.nn as nn\nfrom mmcv.cnn import ConvModule\n\nfrom mmdet.core.optimizers import LearningRateDecayOptimizerConstructor\n\nbase_lr = 1\ndecay_rate = 2\nbase_wd = 0.05\nweight_decay = 0.05\n\nexpected_stage_wise_lr_wd_convnext = [{\n    'weight_decay': 0.0,\n    'lr_scale': 128\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 1\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 64\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 64\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 32\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 32\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 16\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 16\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 8\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 8\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 128\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 1\n}]\n\nexpected_layer_wise_lr_wd_convnext = [{\n    'weight_decay': 0.0,\n    'lr_scale': 128\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 1\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 64\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 64\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 32\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 32\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 16\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 16\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 2\n}, {\n    'weight_decay': 0.0,\n    'lr_scale': 2\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 128\n}, {\n    'weight_decay': 0.05,\n    'lr_scale': 1\n}]\n\n\nclass ToyConvNeXt(nn.Module):\n\n    def __init__(self):\n        super().__init__()\n        self.stages = nn.ModuleList()\n        for i in range(4):\n            stage = nn.Sequential(ConvModule(3, 4, kernel_size=1, bias=True))\n            self.stages.append(stage)\n        self.norm0 = nn.BatchNorm2d(2)\n\n        # add some variables to meet unit test coverate rate\n        self.cls_token = nn.Parameter(torch.ones(1))\n        self.mask_token = nn.Parameter(torch.ones(1))\n        self.pos_embed = nn.Parameter(torch.ones(1))\n        self.stem_norm = nn.Parameter(torch.ones(1))\n        self.downsample_norm0 = nn.BatchNorm2d(2)\n        self.downsample_norm1 = nn.BatchNorm2d(2)\n        self.downsample_norm2 = nn.BatchNorm2d(2)\n        self.lin = nn.Parameter(torch.ones(1))\n        self.lin.requires_grad = False\n        self.downsample_layers = nn.ModuleList()\n        for _ in range(4):\n            stage = nn.Sequential(nn.Conv2d(3, 4, kernel_size=1, bias=True))\n            self.downsample_layers.append(stage)\n\n\nclass ToyDetector(nn.Module):\n\n    def __init__(self, backbone):\n        super().__init__()\n        self.backbone = backbone\n        self.head = nn.Conv2d(2, 2, kernel_size=1, groups=2)\n\n\nclass PseudoDataParallel(nn.Module):\n\n    def __init__(self, model):\n        super().__init__()\n        self.module = model\n\n\ndef check_optimizer_lr_wd(optimizer, gt_lr_wd):\n    assert isinstance(optimizer, torch.optim.AdamW)\n    assert optimizer.defaults['lr'] == base_lr\n    assert optimizer.defaults['weight_decay'] == base_wd\n    param_groups = optimizer.param_groups\n    print(param_groups)\n    assert len(param_groups) == len(gt_lr_wd)\n    for i, param_dict in enumerate(param_groups):\n        assert param_dict['weight_decay'] == gt_lr_wd[i]['weight_decay']\n        assert param_dict['lr_scale'] == gt_lr_wd[i]['lr_scale']\n        assert param_dict['lr_scale'] == param_dict['lr']\n\n\ndef test_learning_rate_decay_optimizer_constructor():\n\n    # Test lr wd for ConvNeXT\n    backbone = ToyConvNeXt()\n    model = PseudoDataParallel(ToyDetector(backbone))\n    optimizer_cfg = dict(\n        type='AdamW', lr=base_lr, betas=(0.9, 0.999), weight_decay=0.05)\n    # stagewise decay\n    stagewise_paramwise_cfg = dict(\n        decay_rate=decay_rate, decay_type='stage_wise', num_layers=6)\n    optim_constructor = LearningRateDecayOptimizerConstructor(\n        optimizer_cfg, stagewise_paramwise_cfg)\n    optimizer = optim_constructor(model)\n    check_optimizer_lr_wd(optimizer, expected_stage_wise_lr_wd_convnext)\n    # layerwise decay\n    layerwise_paramwise_cfg = dict(\n        decay_rate=decay_rate, decay_type='layer_wise', num_layers=6)\n    optim_constructor = LearningRateDecayOptimizerConstructor(\n        optimizer_cfg, layerwise_paramwise_cfg)\n    optimizer = optim_constructor(model)\n    check_optimizer_lr_wd(optimizer, expected_layer_wise_lr_wd_convnext)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_logger.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport pytest\n\nfrom mmdet.utils import get_caller_name, log_img_scale\n\n\ndef callee_func():\n    caller_name = get_caller_name()\n    return caller_name\n\n\nclass CallerClassForTest:\n\n    def __init__(self):\n        self.caller_name = callee_func()\n\n\ndef test_get_caller_name():\n    # test the case that caller is a function\n    caller_name = callee_func()\n    assert caller_name == 'test_get_caller_name'\n\n    # test the case that caller is a method in a class\n    caller_class = CallerClassForTest()\n    assert caller_class.caller_name == 'CallerClassForTest.__init__'\n\n\ndef test_log_img_scale():\n    img_scale = (800, 1333)\n    done_logging = log_img_scale(img_scale)\n    assert done_logging\n\n    img_scale = (1333, 800)\n    done_logging = log_img_scale(img_scale, shape_order='wh')\n    assert done_logging\n\n    with pytest.raises(ValueError):\n        img_scale = (1333, 800)\n        done_logging = log_img_scale(img_scale, shape_order='xywh')\n\n    img_scale = (640, 640)\n    done_logging = log_img_scale(img_scale, skip_square=False)\n    assert done_logging\n\n    img_scale = (640, 640)\n    done_logging = log_img_scale(img_scale, skip_square=True)\n    assert not done_logging\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_masks.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.core import BitmapMasks, PolygonMasks, mask2bbox\n\n\ndef dummy_raw_bitmap_masks(size):\n    \"\"\"\n    Args:\n        size (tuple): expected shape of dummy masks, (H, W) or (N, H, W)\n\n    Return:\n        ndarray: dummy mask\n    \"\"\"\n    return np.random.randint(0, 2, size, dtype=np.uint8)\n\n\ndef dummy_raw_polygon_masks(size):\n    \"\"\"\n    Args:\n        size (tuple): expected shape of dummy masks, (N, H, W)\n\n    Return:\n        list[list[ndarray]]: dummy mask\n    \"\"\"\n    num_obj, height, width = size\n    polygons = []\n    for _ in range(num_obj):\n        num_points = np.random.randint(5) * 2 + 6\n        polygons.append([np.random.uniform(0, min(height, width), num_points)])\n    return polygons\n\n\ndef dummy_bboxes(num, max_height, max_width):\n    x1y1 = np.random.randint(0, min(max_height // 2, max_width // 2), (num, 2))\n    wh = np.random.randint(0, min(max_height // 2, max_width // 2), (num, 2))\n    x2y2 = x1y1 + wh\n    return np.concatenate([x1y1, x2y2], axis=1).squeeze().astype(np.float32)\n\n\ndef test_bitmap_mask_init():\n    # init with empty ndarray masks\n    raw_masks = np.empty((0, 28, 28), dtype=np.uint8)\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    assert len(bitmap_masks) == 0\n    assert bitmap_masks.height == 28\n    assert bitmap_masks.width == 28\n\n    # init with empty list masks\n    raw_masks = []\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    assert len(bitmap_masks) == 0\n    assert bitmap_masks.height == 28\n    assert bitmap_masks.width == 28\n\n    # init with ndarray masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    assert len(bitmap_masks) == 3\n    assert bitmap_masks.height == 28\n    assert bitmap_masks.width == 28\n\n    # init with list masks contain 3 instances\n    raw_masks = [dummy_raw_bitmap_masks((28, 28)) for _ in range(3)]\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    assert len(bitmap_masks) == 3\n    assert bitmap_masks.height == 28\n    assert bitmap_masks.width == 28\n\n    # init with raw masks of unsupported type\n    with pytest.raises(AssertionError):\n        raw_masks = [[dummy_raw_bitmap_masks((28, 28))]]\n        BitmapMasks(raw_masks, 28, 28)\n\n\ndef test_bitmap_mask_rescale():\n    # rescale with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    rescaled_masks = bitmap_masks.rescale((56, 72))\n    assert len(rescaled_masks) == 0\n    assert rescaled_masks.height == 56\n    assert rescaled_masks.width == 56\n\n    # rescale with bitmap masks contain 1 instances\n    raw_masks = np.array([[[1, 0, 0, 0], [0, 1, 0, 1]]])\n    bitmap_masks = BitmapMasks(raw_masks, 2, 4)\n    rescaled_masks = bitmap_masks.rescale((8, 8))\n    assert len(rescaled_masks) == 1\n    assert rescaled_masks.height == 4\n    assert rescaled_masks.width == 8\n    truth = np.array([[[1, 1, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],\n                       [0, 0, 1, 1, 0, 0, 1, 1], [0, 0, 1, 1, 0, 0, 1, 1]]])\n    assert (rescaled_masks.masks == truth).all()\n\n\ndef test_bitmap_mask_resize():\n    # resize with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    resized_masks = bitmap_masks.resize((56, 72))\n    assert len(resized_masks) == 0\n    assert resized_masks.height == 56\n    assert resized_masks.width == 72\n\n    # resize with bitmap masks contain 1 instances\n    raw_masks = np.diag(np.ones(4, dtype=np.uint8))[np.newaxis, ...]\n    bitmap_masks = BitmapMasks(raw_masks, 4, 4)\n    resized_masks = bitmap_masks.resize((8, 8))\n    assert len(resized_masks) == 1\n    assert resized_masks.height == 8\n    assert resized_masks.width == 8\n    truth = np.array([[[1, 1, 0, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],\n                       [0, 0, 1, 1, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0],\n                       [0, 0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 1, 1, 0, 0],\n                       [0, 0, 0, 0, 0, 0, 1, 1], [0, 0, 0, 0, 0, 0, 1, 1]]])\n    assert (resized_masks.masks == truth).all()\n\n    # resize to non-square\n    raw_masks = np.diag(np.ones(4, dtype=np.uint8))[np.newaxis, ...]\n    bitmap_masks = BitmapMasks(raw_masks, 4, 4)\n    resized_masks = bitmap_masks.resize((4, 8))\n    assert len(resized_masks) == 1\n    assert resized_masks.height == 4\n    assert resized_masks.width == 8\n    truth = np.array([[[1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0],\n                       [0, 0, 0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 1, 1]]])\n    assert (resized_masks.masks == truth).all()\n\n\ndef test_bitmap_mask_get_bboxes():\n    # resize with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    bboxes = bitmap_masks.get_bboxes()\n    assert len(bboxes) == 0\n\n    # resize with bitmap masks contain 1 instances\n    raw_masks = np.array([[[0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 0, 0, 0, 0],\n                           [0, 0, 1, 1, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0, 0, 0],\n                           [0, 0, 1, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0],\n                           [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0,\n                                                      0]]])\n    bitmap_masks = BitmapMasks(raw_masks, 8, 8)\n    bboxes = bitmap_masks.get_bboxes()\n    assert len(bboxes) == 1\n    truth = np.array([[1, 1, 6, 6]])\n    assert (bboxes == truth).all()\n\n    # resize to non-square\n    raw_masks = np.array([[[1, 1, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0, 0, 0],\n                           [0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0,\n                                                      0]]])\n    bitmap_masks = BitmapMasks(raw_masks, 4, 8)\n    bboxes = bitmap_masks.get_bboxes()\n    truth = np.array([[0, 0, 6, 3]])\n    assert (bboxes == truth).all()\n\n\ndef test_bitmap_mask_flip():\n    # flip with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    flipped_masks = bitmap_masks.flip(flip_direction='horizontal')\n    assert len(flipped_masks) == 0\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n\n    # horizontally flip with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    flipped_masks = bitmap_masks.flip(flip_direction='horizontal')\n    flipped_flipped_masks = flipped_masks.flip(flip_direction='horizontal')\n    assert flipped_masks.masks.shape == (3, 28, 28)\n    assert (bitmap_masks.masks == flipped_flipped_masks.masks).all()\n    assert (flipped_masks.masks == raw_masks[:, :, ::-1]).all()\n\n    # vertically flip with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    flipped_masks = bitmap_masks.flip(flip_direction='vertical')\n    flipped_flipped_masks = flipped_masks.flip(flip_direction='vertical')\n    assert len(flipped_masks) == 3\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n    assert (bitmap_masks.masks == flipped_flipped_masks.masks).all()\n    assert (flipped_masks.masks == raw_masks[:, ::-1, :]).all()\n\n    # diagonal flip with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    flipped_masks = bitmap_masks.flip(flip_direction='diagonal')\n    flipped_flipped_masks = flipped_masks.flip(flip_direction='diagonal')\n    assert len(flipped_masks) == 3\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n    assert (bitmap_masks.masks == flipped_flipped_masks.masks).all()\n    assert (flipped_masks.masks == raw_masks[:, ::-1, ::-1]).all()\n\n\ndef test_bitmap_mask_pad():\n    # pad with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    padded_masks = bitmap_masks.pad((56, 56))\n    assert len(padded_masks) == 0\n    assert padded_masks.height == 56\n    assert padded_masks.width == 56\n\n    # pad with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    padded_masks = bitmap_masks.pad((56, 56))\n    assert len(padded_masks) == 3\n    assert padded_masks.height == 56\n    assert padded_masks.width == 56\n    assert (padded_masks.masks[:, 28:, 28:] == 0).all()\n\n\ndef test_bitmap_mask_crop():\n    # crop with empty bitmap masks\n    dummy_bbox = np.array([0, 10, 10, 27], dtype=np.int)\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    cropped_masks = bitmap_masks.crop(dummy_bbox)\n    assert len(cropped_masks) == 0\n    assert cropped_masks.height == 17\n    assert cropped_masks.width == 10\n\n    # crop with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    cropped_masks = bitmap_masks.crop(dummy_bbox)\n    assert len(cropped_masks) == 3\n    assert cropped_masks.height == 17\n    assert cropped_masks.width == 10\n    x1, y1, x2, y2 = dummy_bbox\n    assert (cropped_masks.masks == raw_masks[:, y1:y2, x1:x2]).all()\n\n    # crop with invalid bbox\n    with pytest.raises(AssertionError):\n        dummy_bbox = dummy_bboxes(2, 28, 28)\n        bitmap_masks.crop(dummy_bbox)\n\n\ndef test_bitmap_mask_crop_and_resize():\n    dummy_bbox = dummy_bboxes(5, 28, 28)\n    inds = np.random.randint(0, 3, (5, ))\n\n    # crop and resize with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    cropped_resized_masks = bitmap_masks.crop_and_resize(\n        dummy_bbox, (56, 56), inds)\n    assert len(cropped_resized_masks) == 0\n    assert cropped_resized_masks.height == 56\n    assert cropped_resized_masks.width == 56\n\n    # crop and resize with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    cropped_resized_masks = bitmap_masks.crop_and_resize(\n        dummy_bbox, (56, 56), inds)\n    assert len(cropped_resized_masks) == 5\n    assert cropped_resized_masks.height == 56\n    assert cropped_resized_masks.width == 56\n\n\ndef test_bitmap_mask_expand():\n    # expand with empty bitmap masks\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    expanded_masks = bitmap_masks.expand(56, 56, 12, 14)\n    assert len(expanded_masks) == 0\n    assert expanded_masks.height == 56\n    assert expanded_masks.width == 56\n\n    # expand with bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    expanded_masks = bitmap_masks.expand(56, 56, 12, 14)\n    assert len(expanded_masks) == 3\n    assert expanded_masks.height == 56\n    assert expanded_masks.width == 56\n    assert (expanded_masks.masks[:, :12, :14] == 0).all()\n    assert (expanded_masks.masks[:, 12 + 28:, 14 + 28:] == 0).all()\n\n\ndef test_bitmap_mask_area():\n    # area of empty bitmap mask\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    assert bitmap_masks.areas.sum() == 0\n\n    # area of bitmap masks contain 3 instances\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    areas = bitmap_masks.areas\n    assert len(areas) == 3\n    assert (areas == raw_masks.sum((1, 2))).all()\n\n\ndef test_bitmap_mask_to_ndarray():\n    # empty bitmap masks to ndarray\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    ndarray_masks = bitmap_masks.to_ndarray()\n    assert isinstance(ndarray_masks, np.ndarray)\n    assert ndarray_masks.shape == (0, 28, 28)\n\n    # bitmap masks contain 3 instances to ndarray\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    ndarray_masks = bitmap_masks.to_ndarray()\n    assert isinstance(ndarray_masks, np.ndarray)\n    assert ndarray_masks.shape == (3, 28, 28)\n    assert (ndarray_masks == raw_masks).all()\n\n\ndef test_bitmap_mask_to_tensor():\n    # empty bitmap masks to tensor\n    raw_masks = dummy_raw_bitmap_masks((0, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    tensor_masks = bitmap_masks.to_tensor(dtype=torch.uint8, device='cpu')\n    assert isinstance(tensor_masks, torch.Tensor)\n    assert tensor_masks.shape == (0, 28, 28)\n\n    # bitmap masks contain 3 instances to tensor\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    tensor_masks = bitmap_masks.to_tensor(dtype=torch.uint8, device='cpu')\n    assert isinstance(tensor_masks, torch.Tensor)\n    assert tensor_masks.shape == (3, 28, 28)\n    assert (tensor_masks.numpy() == raw_masks).all()\n\n\ndef test_bitmap_mask_index():\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    assert (bitmap_masks[0].masks == raw_masks[0]).all()\n    assert (bitmap_masks[range(2)].masks == raw_masks[range(2)]).all()\n\n\ndef test_bitmap_mask_iter():\n    raw_masks = dummy_raw_bitmap_masks((3, 28, 28))\n    bitmap_masks = BitmapMasks(raw_masks, 28, 28)\n    for i, bitmap_mask in enumerate(bitmap_masks):\n        assert bitmap_mask.shape == (28, 28)\n        assert (bitmap_mask == raw_masks[i]).all()\n\n\ndef test_polygon_mask_init():\n    # init with empty masks\n    raw_masks = []\n    polygon_masks = BitmapMasks(raw_masks, 28, 28)\n    assert len(polygon_masks) == 0\n    assert polygon_masks.height == 28\n    assert polygon_masks.width == 28\n\n    # init with masks contain 3 instances\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    assert isinstance(polygon_masks.masks, list)\n    assert isinstance(polygon_masks.masks[0], list)\n    assert isinstance(polygon_masks.masks[0][0], np.ndarray)\n    assert len(polygon_masks) == 3\n    assert polygon_masks.height == 28\n    assert polygon_masks.width == 28\n    assert polygon_masks.to_ndarray().shape == (3, 28, 28)\n\n    # init with raw masks of unsupported type\n    with pytest.raises(AssertionError):\n        raw_masks = [[[]]]\n        PolygonMasks(raw_masks, 28, 28)\n\n        raw_masks = [dummy_raw_polygon_masks((3, 28, 28))]\n        PolygonMasks(raw_masks, 28, 28)\n\n\ndef test_polygon_mask_rescale():\n    # rescale with empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    rescaled_masks = polygon_masks.rescale((56, 72))\n    assert len(rescaled_masks) == 0\n    assert rescaled_masks.height == 56\n    assert rescaled_masks.width == 56\n    assert rescaled_masks.to_ndarray().shape == (0, 56, 56)\n\n    # rescale with polygon masks contain 3 instances\n    raw_masks = [[np.array([1, 1, 3, 1, 4, 3, 2, 4, 1, 3], dtype=np.float)]]\n    polygon_masks = PolygonMasks(raw_masks, 5, 5)\n    rescaled_masks = polygon_masks.rescale((12, 10))\n    assert len(rescaled_masks) == 1\n    assert rescaled_masks.height == 10\n    assert rescaled_masks.width == 10\n    assert rescaled_masks.to_ndarray().shape == (1, 10, 10)\n    truth = np.array(\n        [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n         [0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 0, 0, 0],\n         [0, 0, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1, 0, 0],\n         [0, 0, 0, 1, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],\n         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],\n        np.uint8)\n    assert (rescaled_masks.to_ndarray() == truth).all()\n\n\ndef test_polygon_mask_resize():\n    # resize with empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    resized_masks = polygon_masks.resize((56, 72))\n    assert len(resized_masks) == 0\n    assert resized_masks.height == 56\n    assert resized_masks.width == 72\n    assert resized_masks.to_ndarray().shape == (0, 56, 72)\n    assert len(resized_masks.get_bboxes()) == 0\n\n    # resize with polygon masks contain 1 instance 1 part\n    raw_masks1 = [[np.array([1, 1, 3, 1, 4, 3, 2, 4, 1, 3], dtype=np.float)]]\n    polygon_masks1 = PolygonMasks(raw_masks1, 5, 5)\n    resized_masks1 = polygon_masks1.resize((10, 10))\n    assert len(resized_masks1) == 1\n    assert resized_masks1.height == 10\n    assert resized_masks1.width == 10\n    assert resized_masks1.to_ndarray().shape == (1, 10, 10)\n    truth1 = np.array(\n        [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],\n         [0, 0, 1, 1, 1, 1, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 0, 0, 0],\n         [0, 0, 1, 1, 1, 1, 1, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 1, 0, 0],\n         [0, 0, 0, 1, 1, 1, 1, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],\n         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],\n        np.uint8)\n    assert (resized_masks1.to_ndarray() == truth1).all()\n    bboxes = resized_masks1.get_bboxes()\n    bbox_truth = np.array([[2, 2, 8, 8]])\n    assert (bboxes == bbox_truth).all()\n\n    # resize with polygon masks contain 1 instance 2 part\n    raw_masks2 = [[\n        np.array([0., 0., 1., 0., 1., 1.]),\n        np.array([1., 1., 2., 1., 2., 2., 1., 2.])\n    ]]\n    polygon_masks2 = PolygonMasks(raw_masks2, 3, 3)\n    resized_masks2 = polygon_masks2.resize((6, 6))\n    assert len(resized_masks2) == 1\n    assert resized_masks2.height == 6\n    assert resized_masks2.width == 6\n    assert resized_masks2.to_ndarray().shape == (1, 6, 6)\n    truth2 = np.array(\n        [[0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 0, 0],\n         [0, 0, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], np.uint8)\n    assert (resized_masks2.to_ndarray() == truth2).all()\n\n    # resize with polygon masks contain 2 instances\n    raw_masks3 = [raw_masks1[0], raw_masks2[0]]\n    polygon_masks3 = PolygonMasks(raw_masks3, 5, 5)\n    resized_masks3 = polygon_masks3.resize((10, 10))\n    assert len(resized_masks3) == 2\n    assert resized_masks3.height == 10\n    assert resized_masks3.width == 10\n    assert resized_masks3.to_ndarray().shape == (2, 10, 10)\n    truth3 = np.stack([truth1, np.pad(truth2, ((0, 4), (0, 4)), 'constant')])\n    assert (resized_masks3.to_ndarray() == truth3).all()\n\n    # resize to non-square\n    raw_masks4 = [[np.array([1, 1, 3, 1, 4, 3, 2, 4, 1, 3], dtype=np.float)]]\n    polygon_masks4 = PolygonMasks(raw_masks4, 5, 5)\n    resized_masks4 = polygon_masks4.resize((5, 10))\n    assert len(resized_masks4) == 1\n    assert resized_masks4.height == 5\n    assert resized_masks4.width == 10\n    assert resized_masks4.to_ndarray().shape == (1, 5, 10)\n    truth4 = np.array(\n        [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 1, 1, 0, 0, 0],\n         [0, 0, 1, 1, 1, 1, 1, 1, 0, 0], [0, 0, 0, 1, 1, 1, 0, 0, 0, 0],\n         [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], np.uint8)\n    assert (resized_masks4.to_ndarray() == truth4).all()\n\n\ndef test_polygon_mask_flip():\n    # flip with empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    flipped_masks = polygon_masks.flip(flip_direction='horizontal')\n    assert len(flipped_masks) == 0\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n    assert flipped_masks.to_ndarray().shape == (0, 28, 28)\n\n    # TODO: fixed flip correctness checking after v2.0_coord is merged\n    # horizontally flip with polygon masks contain 3 instances\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    flipped_masks = polygon_masks.flip(flip_direction='horizontal')\n    flipped_flipped_masks = flipped_masks.flip(flip_direction='horizontal')\n    assert len(flipped_masks) == 3\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n    assert flipped_masks.to_ndarray().shape == (3, 28, 28)\n    assert (polygon_masks.to_ndarray() == flipped_flipped_masks.to_ndarray()\n            ).all()\n\n    # vertically flip with polygon masks contain 3 instances\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    flipped_masks = polygon_masks.flip(flip_direction='vertical')\n    flipped_flipped_masks = flipped_masks.flip(flip_direction='vertical')\n    assert len(flipped_masks) == 3\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n    assert flipped_masks.to_ndarray().shape == (3, 28, 28)\n    assert (polygon_masks.to_ndarray() == flipped_flipped_masks.to_ndarray()\n            ).all()\n\n    # diagonal flip with polygon masks contain 3 instances\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    flipped_masks = polygon_masks.flip(flip_direction='diagonal')\n    flipped_flipped_masks = flipped_masks.flip(flip_direction='diagonal')\n    assert len(flipped_masks) == 3\n    assert flipped_masks.height == 28\n    assert flipped_masks.width == 28\n    assert flipped_masks.to_ndarray().shape == (3, 28, 28)\n    assert (polygon_masks.to_ndarray() == flipped_flipped_masks.to_ndarray()\n            ).all()\n\n\ndef test_polygon_mask_crop():\n    dummy_bbox = np.array([0, 10, 10, 27], dtype=np.int)\n    # crop with empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    cropped_masks = polygon_masks.crop(dummy_bbox)\n    assert len(cropped_masks) == 0\n    assert cropped_masks.height == 17\n    assert cropped_masks.width == 10\n    assert cropped_masks.to_ndarray().shape == (0, 17, 10)\n\n    # crop with polygon masks contain 1 instances\n    raw_masks = [[np.array([1., 3., 5., 1., 5., 6., 1, 6])]]\n    polygon_masks = PolygonMasks(raw_masks, 7, 7)\n    bbox = np.array([0, 0, 3, 4])\n    cropped_masks = polygon_masks.crop(bbox)\n    assert len(cropped_masks) == 1\n    assert cropped_masks.height == 4\n    assert cropped_masks.width == 3\n    assert cropped_masks.to_ndarray().shape == (1, 4, 3)\n    truth = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 1], [0, 1, 1]])\n    assert (cropped_masks.to_ndarray() == truth).all()\n\n    # crop with invalid bbox\n    with pytest.raises(AssertionError):\n        dummy_bbox = dummy_bboxes(2, 28, 28)\n        polygon_masks.crop(dummy_bbox)\n\n\ndef test_polygon_mask_pad():\n    # pad with empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    padded_masks = polygon_masks.pad((56, 56))\n    assert len(padded_masks) == 0\n    assert padded_masks.height == 56\n    assert padded_masks.width == 56\n    assert padded_masks.to_ndarray().shape == (0, 56, 56)\n\n    # pad with polygon masks contain 3 instances\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    padded_masks = polygon_masks.pad((56, 56))\n    assert len(padded_masks) == 3\n    assert padded_masks.height == 56\n    assert padded_masks.width == 56\n    assert padded_masks.to_ndarray().shape == (3, 56, 56)\n    assert (padded_masks.to_ndarray()[:, 28:, 28:] == 0).all()\n\n\ndef test_polygon_mask_expand():\n    with pytest.raises(NotImplementedError):\n        raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n        polygon_masks = PolygonMasks(raw_masks, 28, 28)\n        polygon_masks.expand(56, 56, 10, 17)\n\n\ndef test_polygon_mask_crop_and_resize():\n    dummy_bbox = dummy_bboxes(5, 28, 28)\n    inds = np.random.randint(0, 3, (5, ))\n\n    # crop and resize with empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    cropped_resized_masks = polygon_masks.crop_and_resize(\n        dummy_bbox, (56, 56), inds)\n    assert len(cropped_resized_masks) == 0\n    assert cropped_resized_masks.height == 56\n    assert cropped_resized_masks.width == 56\n    assert cropped_resized_masks.to_ndarray().shape == (0, 56, 56)\n\n    # crop and resize with polygon masks contain 3 instances\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    cropped_resized_masks = polygon_masks.crop_and_resize(\n        dummy_bbox, (56, 56), inds)\n    assert len(cropped_resized_masks) == 5\n    assert cropped_resized_masks.height == 56\n    assert cropped_resized_masks.width == 56\n    assert cropped_resized_masks.to_ndarray().shape == (5, 56, 56)\n\n\ndef test_polygon_mask_area():\n    # area of empty polygon masks\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    assert polygon_masks.areas.sum() == 0\n\n    # area of polygon masks contain 1 instance\n    # here we hack a case that the gap between the area of bitmap and polygon\n    # is minor\n    raw_masks = [[np.array([1, 1, 5, 1, 3, 4])]]\n    polygon_masks = PolygonMasks(raw_masks, 6, 6)\n    polygon_area = polygon_masks.areas\n    bitmap_area = polygon_masks.to_bitmap().areas\n    assert len(polygon_area) == 1\n    assert np.isclose(polygon_area, bitmap_area).all()\n\n\ndef test_polygon_mask_to_bitmap():\n    # polygon masks contain 3 instances to bitmap\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    bitmap_masks = polygon_masks.to_bitmap()\n    assert (polygon_masks.to_ndarray() == bitmap_masks.to_ndarray()).all()\n\n\ndef test_polygon_mask_to_ndarray():\n    # empty polygon masks to ndarray\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    ndarray_masks = polygon_masks.to_ndarray()\n    assert isinstance(ndarray_masks, np.ndarray)\n    assert ndarray_masks.shape == (0, 28, 28)\n\n    # polygon masks contain 3 instances to ndarray\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    ndarray_masks = polygon_masks.to_ndarray()\n    assert isinstance(ndarray_masks, np.ndarray)\n    assert ndarray_masks.shape == (3, 28, 28)\n\n\ndef test_polygon_to_tensor():\n    # empty polygon masks to tensor\n    raw_masks = dummy_raw_polygon_masks((0, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    tensor_masks = polygon_masks.to_tensor(dtype=torch.uint8, device='cpu')\n    assert isinstance(tensor_masks, torch.Tensor)\n    assert tensor_masks.shape == (0, 28, 28)\n\n    # polygon masks contain 3 instances to tensor\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    tensor_masks = polygon_masks.to_tensor(dtype=torch.uint8, device='cpu')\n    assert isinstance(tensor_masks, torch.Tensor)\n    assert tensor_masks.shape == (3, 28, 28)\n    assert (tensor_masks.numpy() == polygon_masks.to_ndarray()).all()\n\n\ndef test_polygon_mask_index():\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    # index by integer\n    polygon_masks[0]\n    # index by list\n    polygon_masks[[0, 1]]\n    # index by ndarray\n    polygon_masks[np.asarray([0, 1])]\n    with pytest.raises(ValueError):\n        # invalid index\n        polygon_masks[torch.Tensor([1, 2])]\n\n\ndef test_polygon_mask_iter():\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    for i, polygon_mask in enumerate(polygon_masks):\n        assert np.equal(polygon_mask, raw_masks[i]).all()\n\n\ndef test_mask2bbox():\n    # no instance\n    masks = torch.zeros((1, 20, 15), dtype=torch.bool)\n    bboxes_empty_gt = torch.tensor([[0, 0, 0, 0]]).float()\n    bboxes = mask2bbox(masks)\n    assert torch.allclose(bboxes_empty_gt.float(), bboxes)\n\n    # the entire mask is an instance\n    bboxes_full_gt = torch.tensor([[0, 0, 15, 20]]).float()\n    masks = torch.ones((1, 20, 15), dtype=torch.bool)\n    bboxes = mask2bbox(masks)\n    assert torch.allclose(bboxes_full_gt, bboxes)\n\n    # a pentagon-shaped instance\n    bboxes_gt = torch.tensor([[2, 2, 7, 6]]).float()\n    masks = torch.zeros((1, 20, 15), dtype=torch.bool)\n    masks[0, 2, 4] = True\n    masks[0, 3, 3:6] = True\n    masks[0, 4, 2:7] = True\n    masks[0, 5, 2:7] = True\n    bboxes = mask2bbox(masks)\n    assert torch.allclose(bboxes_gt, bboxes)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_memory.py",
    "content": "import numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.utils import AvoidOOM\nfrom mmdet.utils.memory import cast_tensor_type\n\n\ndef test_avoidoom():\n    tensor = torch.from_numpy(np.random.random((20, 20)))\n    if torch.cuda.is_available():\n        tensor = tensor.cuda()\n        # get default result\n        default_result = torch.mm(tensor, tensor.transpose(1, 0))\n\n        # when not occurred OOM error\n        AvoidCudaOOM = AvoidOOM()\n        result = AvoidCudaOOM.retry_if_cuda_oom(torch.mm)(tensor,\n                                                          tensor.transpose(\n                                                              1, 0))\n        assert default_result.device == result.device and \\\n               default_result.dtype == result.dtype and \\\n               torch.equal(default_result, result)\n\n        # calculate with fp16 and convert back to source type\n        AvoidCudaOOM = AvoidOOM(test=True)\n        result = AvoidCudaOOM.retry_if_cuda_oom(torch.mm)(tensor,\n                                                          tensor.transpose(\n                                                              1, 0))\n        assert default_result.device == result.device and \\\n               default_result.dtype == result.dtype and \\\n               torch.allclose(default_result, result, 1e-3)\n\n        # calculate on cpu and convert back to source device\n        AvoidCudaOOM = AvoidOOM(test=True)\n        result = AvoidCudaOOM.retry_if_cuda_oom(torch.mm)(tensor,\n                                                          tensor.transpose(\n                                                              1, 0))\n        assert result.dtype == default_result.dtype and \\\n               result.device == default_result.device and \\\n               torch.allclose(default_result, result)\n\n        # do not calculate on cpu and the outputs will be same as input\n        AvoidCudaOOM = AvoidOOM(test=True, to_cpu=False)\n        result = AvoidCudaOOM.retry_if_cuda_oom(torch.mm)(tensor,\n                                                          tensor.transpose(\n                                                              1, 0))\n        assert result.dtype == default_result.dtype and \\\n               result.device == default_result.device\n\n    else:\n        default_result = torch.mm(tensor, tensor.transpose(1, 0))\n        AvoidCudaOOM = AvoidOOM()\n        result = AvoidCudaOOM.retry_if_cuda_oom(torch.mm)(tensor,\n                                                          tensor.transpose(\n                                                              1, 0))\n        assert default_result.device == result.device and \\\n               default_result.dtype == result.dtype and \\\n               torch.equal(default_result, result)\n\n\ndef test_cast_tensor_type():\n    inputs = torch.rand(10)\n    if torch.cuda.is_available():\n        inputs = inputs.cuda()\n    with pytest.raises(AssertionError):\n        cast_tensor_type(inputs, src_type=None, dst_type=None)\n    # input is a float\n    out = cast_tensor_type(10., dst_type=torch.half)\n    assert out == 10. and isinstance(out, float)\n    # convert Tensor to fp16 and re-convert to fp32\n    fp16_out = cast_tensor_type(inputs, dst_type=torch.half)\n    assert fp16_out.dtype == torch.half\n    fp32_out = cast_tensor_type(fp16_out, dst_type=torch.float32)\n    assert fp32_out.dtype == torch.float32\n\n    # input is a list\n    list_input = [inputs, inputs]\n    list_outs = cast_tensor_type(list_input, dst_type=torch.half)\n    assert len(list_outs) == len(list_input) and \\\n           isinstance(list_outs, list)\n    for out in list_outs:\n        assert out.dtype == torch.half\n    # input is a dict\n    dict_input = {'test1': inputs, 'test2': inputs}\n    dict_outs = cast_tensor_type(dict_input, dst_type=torch.half)\n    assert len(dict_outs) == len(dict_input) and \\\n           isinstance(dict_outs, dict)\n\n    # convert the input tensor to CPU and re-convert to GPU\n    if torch.cuda.is_available():\n        cpu_device = torch.empty(0).device\n        gpu_device = inputs.device\n        cpu_out = cast_tensor_type(inputs, dst_type=cpu_device)\n        assert cpu_out.device == cpu_device\n\n        gpu_out = cast_tensor_type(inputs, dst_type=gpu_device)\n        assert gpu_out.device == gpu_device\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_misc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nimport tempfile\n\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.core.bbox import distance2bbox\nfrom mmdet.core.mask.structures import BitmapMasks, PolygonMasks\nfrom mmdet.core.utils import (center_of_mass, filter_scores_and_topk,\n                              flip_tensor, mask2ndarray, select_single_mlvl)\nfrom mmdet.utils import find_latest_checkpoint\n\n\ndef dummy_raw_polygon_masks(size):\n    \"\"\"\n    Args:\n        size (tuple): expected shape of dummy masks, (N, H, W)\n\n    Return:\n        list[list[ndarray]]: dummy mask\n    \"\"\"\n    num_obj, height, width = size\n    polygons = []\n    for _ in range(num_obj):\n        num_points = np.random.randint(5) * 2 + 6\n        polygons.append([np.random.uniform(0, min(height, width), num_points)])\n    return polygons\n\n\ndef test_mask2ndarray():\n    raw_masks = np.ones((3, 28, 28))\n    bitmap_mask = BitmapMasks(raw_masks, 28, 28)\n    output_mask = mask2ndarray(bitmap_mask)\n    assert np.allclose(raw_masks, output_mask)\n\n    raw_masks = dummy_raw_polygon_masks((3, 28, 28))\n    polygon_masks = PolygonMasks(raw_masks, 28, 28)\n    output_mask = mask2ndarray(polygon_masks)\n    assert output_mask.shape == (3, 28, 28)\n\n    raw_masks = np.ones((3, 28, 28))\n    output_mask = mask2ndarray(raw_masks)\n    assert np.allclose(raw_masks, output_mask)\n\n    raw_masks = torch.ones((3, 28, 28))\n    output_mask = mask2ndarray(raw_masks)\n    assert np.allclose(raw_masks, output_mask)\n\n    # test unsupported type\n    raw_masks = []\n    with pytest.raises(TypeError):\n        output_mask = mask2ndarray(raw_masks)\n\n\ndef test_distance2bbox():\n    point = torch.Tensor([[74., 61.], [-29., 106.], [138., 61.], [29., 170.]])\n\n    distance = torch.Tensor([[0., 0, 1., 1.], [1., 2., 10., 6.],\n                             [22., -29., 138., 61.], [54., -29., 170., 61.]])\n    expected_decode_bboxes = torch.Tensor([[74., 61., 75., 62.],\n                                           [0., 104., 0., 112.],\n                                           [100., 90., 100., 120.],\n                                           [0., 120., 100., 120.]])\n    out_bbox = distance2bbox(point, distance, max_shape=(120, 100))\n    assert expected_decode_bboxes.allclose(out_bbox)\n    out = distance2bbox(point, distance, max_shape=torch.Tensor((120, 100)))\n    assert expected_decode_bboxes.allclose(out)\n\n    batch_point = point.unsqueeze(0).repeat(2, 1, 1)\n    batch_distance = distance.unsqueeze(0).repeat(2, 1, 1)\n    batch_out = distance2bbox(\n        batch_point, batch_distance, max_shape=(120, 100))[0]\n    assert out.allclose(batch_out)\n    batch_out = distance2bbox(\n        batch_point, batch_distance, max_shape=[(120, 100), (120, 100)])[0]\n    assert out.allclose(batch_out)\n\n    batch_out = distance2bbox(point, batch_distance, max_shape=(120, 100))[0]\n    assert out.allclose(batch_out)\n\n    # test max_shape is not equal to batch\n    with pytest.raises(AssertionError):\n        distance2bbox(\n            batch_point,\n            batch_distance,\n            max_shape=[(120, 100), (120, 100), (32, 32)])\n\n    rois = torch.zeros((0, 4))\n    deltas = torch.zeros((0, 4))\n    out = distance2bbox(rois, deltas, max_shape=(120, 100))\n    assert rois.shape == out.shape\n\n    rois = torch.zeros((2, 0, 4))\n    deltas = torch.zeros((2, 0, 4))\n    out = distance2bbox(rois, deltas, max_shape=(120, 100))\n    assert rois.shape == out.shape\n\n\n@pytest.mark.parametrize('mask', [\n    torch.ones((28, 28)),\n    torch.zeros((28, 28)),\n    torch.rand(28, 28) > 0.5,\n    torch.tensor([[0, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 0]])\n])\ndef test_center_of_mass(mask):\n    center_h, center_w = center_of_mass(mask)\n    if mask.shape[0] == 4:\n        assert center_h == 1.5\n        assert center_w == 1.5\n    assert isinstance(center_h, torch.Tensor) \\\n           and isinstance(center_w, torch.Tensor)\n    assert 0 <= center_h <= 28 \\\n           and 0 <= center_w <= 28\n\n\ndef test_flip_tensor():\n    img = np.random.random((1, 3, 10, 10))\n    src_tensor = torch.from_numpy(img)\n\n    # test flip_direction parameter error\n    with pytest.raises(AssertionError):\n        flip_tensor(src_tensor, 'flip')\n\n    # test tensor dimension\n    with pytest.raises(AssertionError):\n        flip_tensor(src_tensor[0], 'vertical')\n\n    hfilp_tensor = flip_tensor(src_tensor, 'horizontal')\n    expected_hflip_tensor = torch.from_numpy(img[..., ::-1, :].copy())\n    expected_hflip_tensor.allclose(hfilp_tensor)\n\n    vfilp_tensor = flip_tensor(src_tensor, 'vertical')\n    expected_vflip_tensor = torch.from_numpy(img[..., ::-1].copy())\n    expected_vflip_tensor.allclose(vfilp_tensor)\n\n    diag_filp_tensor = flip_tensor(src_tensor, 'diagonal')\n    expected_diag_filp_tensor = torch.from_numpy(img[..., ::-1, ::-1].copy())\n    expected_diag_filp_tensor.allclose(diag_filp_tensor)\n\n\ndef test_select_single_mlvl():\n    mlvl_tensors = [torch.rand(2, 1, 10, 10)] * 5\n    mlvl_tensor_list = select_single_mlvl(mlvl_tensors, 1)\n    assert len(mlvl_tensor_list) == 5 and mlvl_tensor_list[0].ndim == 3\n\n\ndef test_filter_scores_and_topk():\n    score = torch.tensor([[0.1, 0.3, 0.2], [0.12, 0.7, 0.9], [0.02, 0.8, 0.08],\n                          [0.4, 0.1, 0.08]])\n    bbox_pred = torch.tensor([[0.2, 0.3], [0.4, 0.7], [0.1, 0.1], [0.5, 0.1]])\n    score_thr = 0.15\n    nms_pre = 4\n    # test results type error\n    with pytest.raises(NotImplementedError):\n        filter_scores_and_topk(score, score_thr, nms_pre, (score, ))\n\n    filtered_results = filter_scores_and_topk(\n        score, score_thr, nms_pre, results=dict(bbox_pred=bbox_pred))\n    filtered_score, labels, keep_idxs, results = filtered_results\n    assert filtered_score.allclose(torch.tensor([0.9, 0.8, 0.7, 0.4]))\n    assert labels.allclose(torch.tensor([2, 1, 1, 0]))\n    assert keep_idxs.allclose(torch.tensor([1, 2, 1, 3]))\n    assert results['bbox_pred'].allclose(\n        torch.tensor([[0.4, 0.7], [0.1, 0.1], [0.4, 0.7], [0.5, 0.1]]))\n\n\ndef test_find_latest_checkpoint():\n    with tempfile.TemporaryDirectory() as tmpdir:\n        path = tmpdir\n        latest = find_latest_checkpoint(path)\n        # There are no checkpoints in the path.\n        assert latest is None\n\n        path = osp.join(tmpdir, 'none')\n        latest = find_latest_checkpoint(path)\n        # The path does not exist.\n        assert latest is None\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        with open(osp.join(tmpdir, 'latest.pth'), 'w') as f:\n            f.write('latest')\n        path = tmpdir\n        latest = find_latest_checkpoint(path)\n        assert latest == osp.join(tmpdir, 'latest.pth')\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        with open(osp.join(tmpdir, 'iter_4000.pth'), 'w') as f:\n            f.write('iter_4000')\n        with open(osp.join(tmpdir, 'iter_8000.pth'), 'w') as f:\n            f.write('iter_8000')\n        path = tmpdir\n        latest = find_latest_checkpoint(path)\n        assert latest == osp.join(tmpdir, 'iter_8000.pth')\n\n    with tempfile.TemporaryDirectory() as tmpdir:\n        with open(osp.join(tmpdir, 'epoch_1.pth'), 'w') as f:\n            f.write('epoch_1')\n        with open(osp.join(tmpdir, 'epoch_2.pth'), 'w') as f:\n            f.write('epoch_2')\n        path = tmpdir\n        latest = find_latest_checkpoint(path)\n        assert latest == osp.join(tmpdir, 'epoch_2.pth')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_nms.py",
    "content": "import pytest\nimport torch\n\nfrom mmdet.core.post_processing import mask_matrix_nms\n\n\ndef _create_mask(N, h, w):\n    masks = torch.rand((N, h, w)) > 0.5\n    labels = torch.rand(N)\n    scores = torch.rand(N)\n    return masks, labels, scores\n\n\ndef test_nms_input_errors():\n    with pytest.raises(AssertionError):\n        mask_matrix_nms(\n            torch.rand((10, 28, 28)), torch.rand(11), torch.rand(11))\n    with pytest.raises(AssertionError):\n        masks = torch.rand((10, 28, 28))\n        mask_matrix_nms(\n            masks,\n            torch.rand(11),\n            torch.rand(11),\n            mask_area=masks.sum((1, 2)).float()[:8])\n    with pytest.raises(NotImplementedError):\n        mask_matrix_nms(\n            torch.rand((10, 28, 28)),\n            torch.rand(10),\n            torch.rand(10),\n            kernel='None')\n    # test an empty results\n    masks, labels, scores = _create_mask(0, 28, 28)\n    score, label, mask, keep_ind = \\\n        mask_matrix_nms(masks, labels, scores)\n    assert len(score) == len(label) == \\\n           len(mask) == len(keep_ind) == 0\n\n    # do not use update_thr, nms_pre and max_num\n    masks, labels, scores = _create_mask(1000, 28, 28)\n    score, label, mask, keep_ind = \\\n        mask_matrix_nms(masks, labels, scores)\n    assert len(score) == len(label) == \\\n           len(mask) == len(keep_ind) == 1000\n    # only use nms_pre\n    score, label, mask, keep_ind = \\\n        mask_matrix_nms(masks, labels, scores, nms_pre=500)\n    assert len(score) == len(label) == \\\n           len(mask) == len(keep_ind) == 500\n    # use max_num\n    score, label, mask, keep_ind = \\\n        mask_matrix_nms(masks, labels, scores,\n                        nms_pre=500, max_num=100)\n    assert len(score) == len(label) == \\\n           len(mask) == len(keep_ind) == 100\n\n    masks, labels, _ = _create_mask(1, 28, 28)\n    scores = torch.Tensor([1.0])\n    masks = masks.expand(1000, 28, 28)\n    labels = labels.expand(1000)\n    scores = scores.expand(1000)\n\n    # assert scores is decayed and update_thr is worked\n    # if with the same mask, label, and all scores = 1\n    # the first score will set to 1, others will decay.\n    score, label, mask, keep_ind = \\\n        mask_matrix_nms(masks,\n                        labels,\n                        scores,\n                        nms_pre=500,\n                        max_num=100,\n                        kernel='gaussian',\n                        sigma=2.0,\n                        filter_thr=0.5)\n    assert len(score) == 1\n    assert score[0] == 1\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_replace_cfg_vals.py",
    "content": "import os.path as osp\nimport tempfile\nfrom copy import deepcopy\n\nimport pytest\nfrom mmcv.utils import Config\n\nfrom mmdet.utils import replace_cfg_vals\n\n\ndef test_replace_cfg_vals():\n    temp_file = tempfile.NamedTemporaryFile()\n    cfg_path = f'{temp_file.name}.py'\n    with open(cfg_path, 'w') as f:\n        f.write('configs')\n\n    ori_cfg_dict = dict()\n    ori_cfg_dict['cfg_name'] = osp.basename(temp_file.name)\n    ori_cfg_dict['work_dir'] = 'work_dirs/${cfg_name}/${percent}/${fold}'\n    ori_cfg_dict['percent'] = 5\n    ori_cfg_dict['fold'] = 1\n    ori_cfg_dict['model_wrapper'] = dict(\n        type='SoftTeacher', detector='${model}')\n    ori_cfg_dict['model'] = dict(\n        type='FasterRCNN',\n        backbone=dict(type='ResNet'),\n        neck=dict(type='FPN'),\n        rpn_head=dict(type='RPNHead'),\n        roi_head=dict(type='StandardRoIHead'),\n        train_cfg=dict(\n            rpn=dict(\n                assigner=dict(type='MaxIoUAssigner'),\n                sampler=dict(type='RandomSampler'),\n            ),\n            rpn_proposal=dict(nms=dict(type='nms', iou_threshold=0.7)),\n            rcnn=dict(\n                assigner=dict(type='MaxIoUAssigner'),\n                sampler=dict(type='RandomSampler'),\n            ),\n        ),\n        test_cfg=dict(\n            rpn=dict(nms=dict(type='nms', iou_threshold=0.7)),\n            rcnn=dict(nms=dict(type='nms', iou_threshold=0.5)),\n        ),\n    )\n    ori_cfg_dict['iou_threshold'] = dict(\n        rpn_proposal_nms='${model.train_cfg.rpn_proposal.nms.iou_threshold}',\n        test_rpn_nms='${model.test_cfg.rpn.nms.iou_threshold}',\n        test_rcnn_nms='${model.test_cfg.rcnn.nms.iou_threshold}',\n    )\n\n    ori_cfg_dict['str'] = 'Hello, world!'\n    ori_cfg_dict['dict'] = {'Hello': 'world!'}\n    ori_cfg_dict['list'] = [\n        'Hello, world!',\n    ]\n    ori_cfg_dict['tuple'] = ('Hello, world!', )\n    ori_cfg_dict['test_str'] = 'xxx${str}xxx'\n\n    ori_cfg = Config(ori_cfg_dict, filename=cfg_path)\n    updated_cfg = replace_cfg_vals(deepcopy(ori_cfg))\n\n    assert updated_cfg.work_dir \\\n        == f'work_dirs/{osp.basename(temp_file.name)}/5/1'\n    assert updated_cfg.model.detector == ori_cfg.model\n    assert updated_cfg.iou_threshold.rpn_proposal_nms \\\n        == ori_cfg.model.train_cfg.rpn_proposal.nms.iou_threshold\n    assert updated_cfg.test_str == 'xxxHello, world!xxx'\n    ori_cfg_dict['test_dict'] = 'xxx${dict}xxx'\n    ori_cfg_dict['test_list'] = 'xxx${list}xxx'\n    ori_cfg_dict['test_tuple'] = 'xxx${tuple}xxx'\n    with pytest.raises(AssertionError):\n        cfg = deepcopy(ori_cfg)\n        cfg['test_dict'] = 'xxx${dict}xxx'\n        updated_cfg = replace_cfg_vals(cfg)\n    with pytest.raises(AssertionError):\n        cfg = deepcopy(ori_cfg)\n        cfg['test_list'] = 'xxx${list}xxx'\n        updated_cfg = replace_cfg_vals(cfg)\n    with pytest.raises(AssertionError):\n        cfg = deepcopy(ori_cfg)\n        cfg['test_tuple'] = 'xxx${tuple}xxx'\n        updated_cfg = replace_cfg_vals(cfg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_setup_env.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport multiprocessing as mp\nimport os\nimport platform\n\nimport cv2\nfrom mmcv import Config\n\nfrom mmdet.utils import setup_multi_processes\n\n\ndef test_setup_multi_processes():\n    # temp save system setting\n    sys_start_mehod = mp.get_start_method(allow_none=True)\n    sys_cv_threads = cv2.getNumThreads()\n    # pop and temp save system env vars\n    sys_omp_threads = os.environ.pop('OMP_NUM_THREADS', default=None)\n    sys_mkl_threads = os.environ.pop('MKL_NUM_THREADS', default=None)\n\n    # test config without setting env\n    config = dict(data=dict(workers_per_gpu=2))\n    cfg = Config(config)\n    setup_multi_processes(cfg)\n    assert os.getenv('OMP_NUM_THREADS') == '1'\n    assert os.getenv('MKL_NUM_THREADS') == '1'\n    # when set to 0, the num threads will be 1\n    assert cv2.getNumThreads() == 1\n    if platform.system() != 'Windows':\n        assert mp.get_start_method() == 'fork'\n\n    # test num workers <= 1\n    os.environ.pop('OMP_NUM_THREADS')\n    os.environ.pop('MKL_NUM_THREADS')\n    config = dict(data=dict(workers_per_gpu=0))\n    cfg = Config(config)\n    setup_multi_processes(cfg)\n    assert 'OMP_NUM_THREADS' not in os.environ\n    assert 'MKL_NUM_THREADS' not in os.environ\n\n    # test manually set env var\n    os.environ['OMP_NUM_THREADS'] = '4'\n    config = dict(data=dict(workers_per_gpu=2))\n    cfg = Config(config)\n    setup_multi_processes(cfg)\n    assert os.getenv('OMP_NUM_THREADS') == '4'\n\n    # test manually set opencv threads and mp start method\n    config = dict(\n        data=dict(workers_per_gpu=2),\n        opencv_num_threads=4,\n        mp_start_method='spawn')\n    cfg = Config(config)\n    setup_multi_processes(cfg)\n    assert cv2.getNumThreads() == 4\n    assert mp.get_start_method() == 'spawn'\n\n    # revert setting to avoid affecting other programs\n    if sys_start_mehod:\n        mp.set_start_method(sys_start_mehod, force=True)\n    cv2.setNumThreads(sys_cv_threads)\n    if sys_omp_threads:\n        os.environ['OMP_NUM_THREADS'] = sys_omp_threads\n    else:\n        os.environ.pop('OMP_NUM_THREADS')\n    if sys_mkl_threads:\n        os.environ['MKL_NUM_THREADS'] = sys_mkl_threads\n    else:\n        os.environ.pop('MKL_NUM_THREADS')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_split_batch.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nfrom copy import deepcopy\n\nimport mmcv\nimport numpy as np\nimport torch\n\nfrom mmdet.utils import split_batch\n\n\ndef test_split_batch():\n    img_root = osp.join(osp.dirname(__file__), '../data/color.jpg')\n    img = mmcv.imread(img_root, 'color')\n    h, w, _ = img.shape\n    gt_bboxes = np.array([[0.2 * w, 0.2 * h, 0.4 * w, 0.4 * h],\n                          [0.6 * w, 0.6 * h, 0.8 * w, 0.8 * h]],\n                         dtype=np.float32)\n    gt_lables = np.ones(gt_bboxes.shape[0], dtype=np.int64)\n\n    img = torch.tensor(img).permute(2, 0, 1)\n    meta = dict()\n    meta['filename'] = img_root\n    meta['ori_shape'] = img.shape\n    meta['img_shape'] = img.shape\n    meta['img_norm_cfg'] = {\n        'mean': np.array([103.53, 116.28, 123.675], dtype=np.float32),\n        'std': np.array([1., 1., 1.], dtype=np.float32),\n        'to_rgb': False\n    }\n    meta['pad_shape'] = img.shape\n    # For example, tag include sup, unsup_teacher and unsup_student,\n    # in order to distinguish the difference between the three groups of data,\n    # the scale_factor of sup is [0.5, 0.5, 0.5, 0.5]\n    # the scale_factor of unsup_teacher is [1.0, 1.0, 1.0, 1.0]\n    # the scale_factor of unsup_student is [2.0, 2.0, 2.0, 2.0]\n    imgs = img.unsqueeze(0).repeat(9, 1, 1, 1)\n    img_metas = []\n    tags = [\n        'sup', 'unsup_teacher', 'unsup_student', 'unsup_teacher',\n        'unsup_student', 'unsup_teacher', 'unsup_student', 'unsup_teacher',\n        'unsup_student'\n    ]\n    for tag in tags:\n        img_meta = deepcopy(meta)\n        if tag == 'sup':\n            img_meta['scale_factor'] = [0.5, 0.5, 0.5, 0.5]\n            img_meta['tag'] = 'sup'\n        elif tag == 'unsup_teacher':\n            img_meta['scale_factor'] = [1.0, 1.0, 1.0, 1.0]\n            img_meta['tag'] = 'unsup_teacher'\n        elif tag == 'unsup_student':\n            img_meta['scale_factor'] = [2.0, 2.0, 2.0, 2.0]\n            img_meta['tag'] = 'unsup_student'\n        else:\n            continue\n        img_metas.append(img_meta)\n    kwargs = dict()\n    kwargs['gt_bboxes'] = [torch.tensor(gt_bboxes)] + [torch.zeros(0, 4)] * 8\n    kwargs['gt_lables'] = [torch.tensor(gt_lables)] + [torch.zeros(0, )] * 8\n    data_groups = split_batch(imgs, img_metas, kwargs)\n    assert set(data_groups.keys()) == set(tags)\n    assert data_groups['sup']['img'].shape == (1, 3, h, w)\n    assert data_groups['unsup_teacher']['img'].shape == (4, 3, h, w)\n    assert data_groups['unsup_student']['img'].shape == (4, 3, h, w)\n    # the scale_factor of sup is [0.5, 0.5, 0.5, 0.5]\n    assert data_groups['sup']['img_metas'][0]['scale_factor'] == [\n        0.5, 0.5, 0.5, 0.5\n    ]\n    # the scale_factor of unsup_teacher is [1.0, 1.0, 1.0, 1.0]\n    assert data_groups['unsup_teacher']['img_metas'][0]['scale_factor'] == [\n        1.0, 1.0, 1.0, 1.0\n    ]\n    assert data_groups['unsup_teacher']['img_metas'][1]['scale_factor'] == [\n        1.0, 1.0, 1.0, 1.0\n    ]\n    assert data_groups['unsup_teacher']['img_metas'][2]['scale_factor'] == [\n        1.0, 1.0, 1.0, 1.0\n    ]\n    assert data_groups['unsup_teacher']['img_metas'][3]['scale_factor'] == [\n        1.0, 1.0, 1.0, 1.0\n    ]\n    # the scale_factor of unsup_student is [2.0, 2.0, 2.0, 2.0]\n    assert data_groups['unsup_student']['img_metas'][0]['scale_factor'] == [\n        2.0, 2.0, 2.0, 2.0\n    ]\n    assert data_groups['unsup_student']['img_metas'][1]['scale_factor'] == [\n        2.0, 2.0, 2.0, 2.0\n    ]\n    assert data_groups['unsup_student']['img_metas'][2]['scale_factor'] == [\n        2.0, 2.0, 2.0, 2.0\n    ]\n    assert data_groups['unsup_student']['img_metas'][3]['scale_factor'] == [\n        2.0, 2.0, 2.0, 2.0\n    ]\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_version.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom mmdet import digit_version\n\n\ndef test_version_check():\n    assert digit_version('1.0.5') > digit_version('1.0.5rc0')\n    assert digit_version('1.0.5') > digit_version('1.0.4rc0')\n    assert digit_version('1.0.5') > digit_version('1.0rc0')\n    assert digit_version('1.0.0') > digit_version('0.6.2')\n    assert digit_version('1.0.0') > digit_version('0.2.16')\n    assert digit_version('1.0.5rc0') > digit_version('1.0.0rc0')\n    assert digit_version('1.0.0rc1') > digit_version('1.0.0rc0')\n    assert digit_version('1.0.0rc2') > digit_version('1.0.0rc0')\n    assert digit_version('1.0.0rc2') > digit_version('1.0.0rc1')\n    assert digit_version('1.0.1rc1') > digit_version('1.0.0rc1')\n    assert digit_version('1.0.0') > digit_version('1.0.0rc1')\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tests/test_utils/test_visualization.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os\nimport os.path as osp\nimport tempfile\n\nimport mmcv\nimport numpy as np\nimport pytest\nimport torch\n\nfrom mmdet.core import visualization as vis\nfrom mmdet.datasets import (CityscapesDataset, CocoDataset,\n                            CocoPanopticDataset, VOCDataset)\n\n\ndef test_color():\n    assert vis.color_val_matplotlib(mmcv.Color.blue) == (0., 0., 1.)\n    assert vis.color_val_matplotlib('green') == (0., 1., 0.)\n    assert vis.color_val_matplotlib((1, 2, 3)) == (3 / 255, 2 / 255, 1 / 255)\n    assert vis.color_val_matplotlib(100) == (100 / 255, 100 / 255, 100 / 255)\n    assert vis.color_val_matplotlib(np.zeros(3, dtype=np.int)) == (0., 0., 0.)\n    # forbid white color\n    with pytest.raises(TypeError):\n        vis.color_val_matplotlib([255, 255, 255])\n    # forbid float\n    with pytest.raises(TypeError):\n        vis.color_val_matplotlib(1.0)\n    # overflowed\n    with pytest.raises(AssertionError):\n        vis.color_val_matplotlib((0, 0, 500))\n\n\ndef test_imshow_det_bboxes():\n    tmp_filename = osp.join(tempfile.gettempdir(), 'det_bboxes_image',\n                            'image.jpg')\n    image = np.ones((10, 10, 3), np.uint8)\n    bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])\n    label = np.array([0, 1])\n    out_image = vis.imshow_det_bboxes(\n        image, bbox, label, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    assert image.shape == out_image.shape\n    assert not np.allclose(image, out_image)\n    os.remove(tmp_filename)\n\n    # test grayscale images\n    image = np.ones((10, 10), np.uint8)\n    bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])\n    label = np.array([0, 1])\n    out_image = vis.imshow_det_bboxes(\n        image, bbox, label, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    assert image.shape == out_image.shape[:2]\n    os.remove(tmp_filename)\n\n    # test shaped (0,)\n    image = np.ones((10, 10, 3), np.uint8)\n    bbox = np.ones((0, 4))\n    label = np.ones((0, ))\n    vis.imshow_det_bboxes(\n        image, bbox, label, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    os.remove(tmp_filename)\n\n    # test mask\n    image = np.ones((10, 10, 3), np.uint8)\n    bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])\n    label = np.array([0, 1])\n    segms = np.random.random((2, 10, 10)) > 0.5\n    segms = np.array(segms, np.int32)\n    vis.imshow_det_bboxes(\n        image, bbox, label, segms, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    os.remove(tmp_filename)\n\n    # test tensor mask type error\n    with pytest.raises(AttributeError):\n        segms = torch.tensor(segms)\n        vis.imshow_det_bboxes(image, bbox, label, segms, show=False)\n\n\ndef test_imshow_gt_det_bboxes():\n    tmp_filename = osp.join(tempfile.gettempdir(), 'det_bboxes_image',\n                            'image.jpg')\n    image = np.ones((10, 10, 3), np.uint8)\n    bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])\n    label = np.array([0, 1])\n    annotation = dict(gt_bboxes=bbox, gt_labels=label)\n    det_result = np.array([[2, 1, 3, 3, 0], [3, 4, 6, 6, 1]])\n    result = [det_result]\n    out_image = vis.imshow_gt_det_bboxes(\n        image, annotation, result, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    assert image.shape == out_image.shape\n    assert not np.allclose(image, out_image)\n    os.remove(tmp_filename)\n\n    # test grayscale images\n    image = np.ones((10, 10), np.uint8)\n    bbox = np.array([[2, 1, 3, 3], [3, 4, 6, 6]])\n    label = np.array([0, 1])\n    annotation = dict(gt_bboxes=bbox, gt_labels=label)\n    det_result = np.array([[2, 1, 3, 3, 0], [3, 4, 6, 6, 1]])\n    result = [det_result]\n    vis.imshow_gt_det_bboxes(\n        image, annotation, result, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    os.remove(tmp_filename)\n\n    # test numpy mask\n    gt_mask = np.ones((2, 10, 10))\n    annotation['gt_masks'] = gt_mask\n    vis.imshow_gt_det_bboxes(\n        image, annotation, result, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    os.remove(tmp_filename)\n\n    # test tensor mask\n    gt_mask = torch.ones((2, 10, 10))\n    annotation['gt_masks'] = gt_mask\n    vis.imshow_gt_det_bboxes(\n        image, annotation, result, out_file=tmp_filename, show=False)\n    assert osp.isfile(tmp_filename)\n    os.remove(tmp_filename)\n\n    # test unsupported type\n    annotation['gt_masks'] = []\n    with pytest.raises(TypeError):\n        vis.imshow_gt_det_bboxes(image, annotation, result, show=False)\n\n\ndef test_palette():\n    assert vis.palette_val([(1, 2, 3)])[0] == (1 / 255, 2 / 255, 3 / 255)\n\n    # test list\n    palette = [(1, 0, 0), (0, 1, 0), (0, 0, 1)]\n    palette_ = vis.get_palette(palette, 3)\n    for color, color_ in zip(palette, palette_):\n        assert color == color_\n\n    # test tuple\n    palette = vis.get_palette((1, 2, 3), 3)\n    assert len(palette) == 3\n    for color in palette:\n        assert color == (1, 2, 3)\n\n    # test color str\n    palette = vis.get_palette('red', 3)\n    assert len(palette) == 3\n    for color in palette:\n        assert color == (255, 0, 0)\n\n    # test dataset str\n    palette = vis.get_palette('coco', len(CocoDataset.CLASSES))\n    assert len(palette) == len(CocoDataset.CLASSES)\n    assert palette[0] == (220, 20, 60)\n    palette = vis.get_palette('coco', len(CocoPanopticDataset.CLASSES))\n    assert len(palette) == len(CocoPanopticDataset.CLASSES)\n    assert palette[-1] == (250, 141, 255)\n    palette = vis.get_palette('voc', len(VOCDataset.CLASSES))\n    assert len(palette) == len(VOCDataset.CLASSES)\n    assert palette[0] == (106, 0, 228)\n    palette = vis.get_palette('citys', len(CityscapesDataset.CLASSES))\n    assert len(palette) == len(CityscapesDataset.CLASSES)\n    assert palette[0] == (220, 20, 60)\n\n    # test random\n    palette1 = vis.get_palette('random', 3)\n    palette2 = vis.get_palette(None, 3)\n    for color1, color2 in zip(palette1, palette2):\n        assert isinstance(color1, tuple)\n        assert isinstance(color2, tuple)\n        assert color1 == color2\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/analyze_logs.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport json\nfrom collections import defaultdict\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport seaborn as sns\n\n\ndef cal_train_time(log_dicts, args):\n    for i, log_dict in enumerate(log_dicts):\n        print(f'{\"-\" * 5}Analyze train time of {args.json_logs[i]}{\"-\" * 5}')\n        all_times = []\n        for epoch in log_dict.keys():\n            if args.include_outliers:\n                all_times.append(log_dict[epoch]['time'])\n            else:\n                all_times.append(log_dict[epoch]['time'][1:])\n        if not all_times:\n            raise KeyError(\n                'Please reduce the log interval in the config so that'\n                'interval is less than iterations of one epoch.')\n        all_times = np.array(all_times)\n        epoch_ave_time = all_times.mean(-1)\n        slowest_epoch = epoch_ave_time.argmax()\n        fastest_epoch = epoch_ave_time.argmin()\n        std_over_epoch = epoch_ave_time.std()\n        print(f'slowest epoch {slowest_epoch + 1}, '\n              f'average time is {epoch_ave_time[slowest_epoch]:.4f}')\n        print(f'fastest epoch {fastest_epoch + 1}, '\n              f'average time is {epoch_ave_time[fastest_epoch]:.4f}')\n        print(f'time std over epochs is {std_over_epoch:.4f}')\n        print(f'average iter time: {np.mean(all_times):.4f} s/iter')\n        print()\n\n\ndef plot_curve(log_dicts, args):\n    if args.backend is not None:\n        plt.switch_backend(args.backend)\n    sns.set_style(args.style)\n    # if legend is None, use {filename}_{key} as legend\n    legend = args.legend\n    if legend is None:\n        legend = []\n        for json_log in args.json_logs:\n            for metric in args.keys:\n                legend.append(f'{json_log}_{metric}')\n    assert len(legend) == (len(args.json_logs) * len(args.keys))\n    metrics = args.keys\n\n    num_metrics = len(metrics)\n    for i, log_dict in enumerate(log_dicts):\n        epochs = list(log_dict.keys())\n        for j, metric in enumerate(metrics):\n            print(f'plot curve of {args.json_logs[i]}, metric is {metric}')\n            if metric not in log_dict[epochs[int(args.eval_interval) - 1]]:\n                if 'mAP' in metric:\n                    raise KeyError(\n                        f'{args.json_logs[i]} does not contain metric '\n                        f'{metric}. Please check if \"--no-validate\" is '\n                        'specified when you trained the model.')\n                raise KeyError(\n                    f'{args.json_logs[i]} does not contain metric {metric}. '\n                    'Please reduce the log interval in the config so that '\n                    'interval is less than iterations of one epoch.')\n\n            if 'mAP' in metric:\n                xs = []\n                ys = []\n                for epoch in epochs:\n                    ys += log_dict[epoch][metric]\n                    if 'val' in log_dict[epoch]['mode']:\n                        xs.append(epoch)\n                plt.xlabel('epoch')\n                plt.plot(xs, ys, label=legend[i * num_metrics + j], marker='o')\n            else:\n                xs = []\n                ys = []\n                num_iters_per_epoch = log_dict[epochs[0]]['iter'][-2]\n                for epoch in epochs:\n                    iters = log_dict[epoch]['iter']\n                    if log_dict[epoch]['mode'][-1] == 'val':\n                        iters = iters[:-1]\n                    xs.append(\n                        np.array(iters) + (epoch - 1) * num_iters_per_epoch)\n                    ys.append(np.array(log_dict[epoch][metric][:len(iters)]))\n                xs = np.concatenate(xs)\n                ys = np.concatenate(ys)\n                plt.xlabel('iter')\n                plt.plot(\n                    xs, ys, label=legend[i * num_metrics + j], linewidth=0.5)\n            plt.legend()\n        if args.title is not None:\n            plt.title(args.title)\n    if args.out is None:\n        plt.show()\n    else:\n        print(f'save curve to: {args.out}')\n        plt.savefig(args.out)\n        plt.cla()\n\n\ndef add_plot_parser(subparsers):\n    parser_plt = subparsers.add_parser(\n        'plot_curve', help='parser for plotting curves')\n    parser_plt.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_plt.add_argument(\n        '--keys',\n        type=str,\n        nargs='+',\n        default=['bbox_mAP'],\n        help='the metric that you want to plot')\n    parser_plt.add_argument(\n        '--start-epoch',\n        type=str,\n        default='1',\n        help='the epoch that you want to start')\n    parser_plt.add_argument(\n        '--eval-interval',\n        type=str,\n        default='1',\n        help='the eval interval when training')\n    parser_plt.add_argument('--title', type=str, help='title of figure')\n    parser_plt.add_argument(\n        '--legend',\n        type=str,\n        nargs='+',\n        default=None,\n        help='legend of each plot')\n    parser_plt.add_argument(\n        '--backend', type=str, default=None, help='backend of plt')\n    parser_plt.add_argument(\n        '--style', type=str, default='dark', help='style of plt')\n    parser_plt.add_argument('--out', type=str, default=None)\n\n\ndef add_time_parser(subparsers):\n    parser_time = subparsers.add_parser(\n        'cal_train_time',\n        help='parser for computing the average time per training iteration')\n    parser_time.add_argument(\n        'json_logs',\n        type=str,\n        nargs='+',\n        help='path of train log in json format')\n    parser_time.add_argument(\n        '--include-outliers',\n        action='store_true',\n        help='include the first value of every epoch when computing '\n        'the average time')\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Analyze Json Log')\n    # currently only support plot curve and calculate average train time\n    subparsers = parser.add_subparsers(dest='task', help='task parser')\n    add_plot_parser(subparsers)\n    add_time_parser(subparsers)\n    args = parser.parse_args()\n    return args\n\n\ndef load_json_logs(json_logs):\n    # load and convert json_logs to log_dict, key is epoch, value is a sub dict\n    # keys of sub dict is different metrics, e.g. memory, bbox_mAP\n    # value of sub dict is a list of corresponding values of all iterations\n    log_dicts = [dict() for _ in json_logs]\n    for json_log, log_dict in zip(json_logs, log_dicts):\n        with open(json_log, 'r') as log_file:\n            for i, line in enumerate(log_file):\n                log = json.loads(line.strip())\n                # skip the first training info line\n                if i == 0:\n                    continue\n                # skip lines without `epoch` field\n                if 'epoch' not in log:\n                    continue\n                epoch = log.pop('epoch')\n                if epoch not in log_dict:\n                    log_dict[epoch] = defaultdict(list)\n                for k, v in log.items():\n                    log_dict[epoch][k].append(v)\n    return log_dicts\n\n\ndef main():\n    args = parse_args()\n\n    json_logs = args.json_logs\n    for json_log in json_logs:\n        assert json_log.endswith('.json')\n\n    log_dicts = load_json_logs(json_logs)\n\n    eval(args.task)(log_dicts, args)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/analyze_results.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os.path as osp\nfrom multiprocessing import Pool\n\nimport mmcv\nimport numpy as np\nfrom mmcv import Config, DictAction\n\nfrom mmdet.core.evaluation import eval_map\nfrom mmdet.core.visualization import imshow_gt_det_bboxes\nfrom mmdet.datasets import build_dataset, get_loading_pipeline\nfrom mmdet.datasets.api_wrappers import pq_compute_single_core\nfrom mmdet.utils import replace_cfg_vals, update_data_root\n\n\ndef bbox_map_eval(det_result, annotation, nproc=4):\n    \"\"\"Evaluate mAP of single image det result.\n\n    Args:\n        det_result (list[list]): [[cls1_det, cls2_det, ...], ...].\n            The outer list indicates images, and the inner list indicates\n            per-class detected bboxes.\n        annotation (dict): Ground truth annotations where keys of\n             annotations are:\n\n            - bboxes: numpy array of shape (n, 4)\n            - labels: numpy array of shape (n, )\n            - bboxes_ignore (optional): numpy array of shape (k, 4)\n            - labels_ignore (optional): numpy array of shape (k, )\n\n        nproc (int): Processes used for computing mAP.\n            Default: 4.\n\n    Returns:\n        float: mAP\n    \"\"\"\n\n    # use only bbox det result\n    if isinstance(det_result, tuple):\n        bbox_det_result = [det_result[0]]\n    else:\n        bbox_det_result = [det_result]\n    # mAP\n    iou_thrs = np.linspace(\n        .5, 0.95, int(np.round((0.95 - .5) / .05)) + 1, endpoint=True)\n\n    processes = []\n    workers = Pool(processes=nproc)\n    for thr in iou_thrs:\n        p = workers.apply_async(eval_map, (bbox_det_result, [annotation]), {\n            'iou_thr': thr,\n            'logger': 'silent',\n            'nproc': 1\n        })\n        processes.append(p)\n\n    workers.close()\n    workers.join()\n\n    mean_aps = []\n    for p in processes:\n        mean_aps.append(p.get()[0])\n\n    return sum(mean_aps) / len(mean_aps)\n\n\nclass ResultVisualizer:\n    \"\"\"Display and save evaluation results.\n\n    Args:\n        show (bool): Whether to show the image. Default: True.\n        wait_time (float): Value of waitKey param. Default: 0.\n        score_thr (float): Minimum score of bboxes to be shown.\n           Default: 0.\n        overlay_gt_pred (bool): Whether to plot gts and predictions on the\n            same image. If False, predictions and gts will be plotted on two\n            same image which will be concatenated in vertical direction.\n            The image above is drawn with gt, and the image below is drawn\n            with the prediction result. Default: False.\n    \"\"\"\n\n    def __init__(self,\n                 show=False,\n                 wait_time=0,\n                 score_thr=0,\n                 overlay_gt_pred=False):\n        self.show = show\n        self.wait_time = wait_time\n        self.score_thr = score_thr\n        self.overlay_gt_pred = overlay_gt_pred\n\n    def _save_image_gts_results(self,\n                                dataset,\n                                results,\n                                performances,\n                                out_dir=None):\n        \"\"\"Display or save image with groung truths and predictions from a\n        model.\n\n        Args:\n            dataset (Dataset): A PyTorch dataset.\n            results (list): Object detection or panoptic segmentation\n                results from test results pkl file.\n            performances (dict): A dict contains samples's indices\n                in dataset and model's performance on them.\n            out_dir (str, optional): The filename to write the image.\n                Defaults: None.\n        \"\"\"\n        mmcv.mkdir_or_exist(out_dir)\n\n        for performance_info in performances:\n            index, performance = performance_info\n            data_info = dataset.prepare_train_img(index)\n\n            # calc save file path\n            filename = data_info['filename']\n            if data_info['img_prefix'] is not None:\n                filename = osp.join(data_info['img_prefix'], filename)\n            else:\n                filename = data_info['filename']\n            fname, name = osp.splitext(osp.basename(filename))\n            save_filename = fname + '_' + str(round(performance, 3)) + name\n            out_file = osp.join(out_dir, save_filename)\n            imshow_gt_det_bboxes(\n                data_info['img'],\n                data_info,\n                results[index],\n                dataset.CLASSES,\n                gt_bbox_color=dataset.PALETTE,\n                gt_text_color=(200, 200, 200),\n                gt_mask_color=dataset.PALETTE,\n                det_bbox_color=dataset.PALETTE,\n                det_text_color=(200, 200, 200),\n                det_mask_color=dataset.PALETTE,\n                show=self.show,\n                score_thr=self.score_thr,\n                wait_time=self.wait_time,\n                out_file=out_file,\n                overlay_gt_pred=self.overlay_gt_pred)\n\n    def evaluate_and_show(self,\n                          dataset,\n                          results,\n                          topk=20,\n                          show_dir='work_dir'):\n        \"\"\"Evaluate and show results.\n\n        Args:\n            dataset (Dataset): A PyTorch dataset.\n            results (list): Object detection or panoptic segmentation\n                results from test results pkl file.\n            topk (int): Number of the highest topk and\n                lowest topk after evaluation index sorting. Default: 20.\n            show_dir (str, optional): The filename to write the image.\n                Default: 'work_dir'\n            eval_fn (callable, optional): Eval function, Default: None.\n        \"\"\"\n\n        assert topk > 0\n        if (topk * 2) > len(dataset):\n            topk = len(dataset) // 2\n\n        if isinstance(results[0], dict):\n            good_samples, bad_samples = self.panoptic_evaluate(\n                dataset, results, topk=topk)\n        elif isinstance(results[0], list):\n            good_samples, bad_samples = self.detection_evaluate(\n                dataset, results, topk=topk)\n        elif isinstance(results[0], tuple):\n            results_ = [result[0] for result in results]\n            good_samples, bad_samples = self.detection_evaluate(\n                dataset, results_, topk=topk)\n        else:\n            raise 'The format of result is not supported yet. ' \\\n                'Current dict for panoptic segmentation and list ' \\\n                'or tuple for object detection are supported.'\n\n        good_dir = osp.abspath(osp.join(show_dir, 'good'))\n        bad_dir = osp.abspath(osp.join(show_dir, 'bad'))\n        self._save_image_gts_results(dataset, results, good_samples, good_dir)\n        self._save_image_gts_results(dataset, results, bad_samples, bad_dir)\n\n    def detection_evaluate(self, dataset, results, topk=20, eval_fn=None):\n        \"\"\"Evaluation for object detection.\n\n        Args:\n            dataset (Dataset): A PyTorch dataset.\n            results (list): Object detection results from test\n                results pkl file.\n            topk (int): Number of the highest topk and\n                lowest topk after evaluation index sorting. Default: 20.\n            eval_fn (callable, optional): Eval function, Default: None.\n\n        Returns:\n            tuple: A tuple contains good samples and bad samples.\n                good_mAPs (dict[int, float]): A dict contains good\n                    samples's indices in dataset and model's\n                    performance on them.\n                bad_mAPs (dict[int, float]): A dict contains bad\n                    samples's indices in dataset and model's\n                    performance on them.\n        \"\"\"\n        if eval_fn is None:\n            eval_fn = bbox_map_eval\n        else:\n            assert callable(eval_fn)\n\n        prog_bar = mmcv.ProgressBar(len(results))\n        _mAPs = {}\n        for i, (result, ) in enumerate(zip(results)):\n            # self.dataset[i] should not call directly\n            # because there is a risk of mismatch\n            data_info = dataset.prepare_train_img(i)\n            mAP = eval_fn(result, data_info['ann_info'])\n            _mAPs[i] = mAP\n            prog_bar.update()\n        # descending select topk image\n        _mAPs = list(sorted(_mAPs.items(), key=lambda kv: kv[1]))\n        good_mAPs = _mAPs[-topk:]\n        bad_mAPs = _mAPs[:topk]\n\n        return good_mAPs, bad_mAPs\n\n    def panoptic_evaluate(self, dataset, results, topk=20):\n        \"\"\"Evaluation for panoptic segmentation.\n\n        Args:\n            dataset (Dataset): A PyTorch dataset.\n            results (list): Panoptic segmentation results from test\n                results pkl file.\n            topk (int): Number of the highest topk and\n                lowest topk after evaluation index sorting. Default: 20.\n\n        Returns:\n            tuple: A tuple contains good samples and bad samples.\n                good_pqs (dict[int, float]): A dict contains good\n                    samples's indices in dataset and model's\n                    performance on them.\n                bad_pqs (dict[int, float]): A dict contains bad\n                    samples's indices in dataset and model's\n                    performance on them.\n        \"\"\"\n        # image to annotations\n        gt_json = dataset.coco.img_ann_map\n\n        result_files, tmp_dir = dataset.format_results(results)\n        pred_json = mmcv.load(result_files['panoptic'])['annotations']\n        pred_folder = osp.join(tmp_dir.name, 'panoptic')\n        gt_folder = dataset.seg_prefix\n\n        pqs = {}\n        prog_bar = mmcv.ProgressBar(len(results))\n        for i in range(len(results)):\n            data_info = dataset.prepare_train_img(i)\n            image_id = data_info['img_info']['id']\n            gt_ann = {\n                'image_id': image_id,\n                'segments_info': gt_json[image_id],\n                'file_name': data_info['img_info']['segm_file']\n            }\n            pred_ann = pred_json[i]\n            pq_stat = pq_compute_single_core(\n                i, [(gt_ann, pred_ann)],\n                gt_folder,\n                pred_folder,\n                dataset.categories,\n                dataset.file_client,\n                print_log=False)\n            pq_results, classwise_results = pq_stat.pq_average(\n                dataset.categories, isthing=None)\n            pqs[i] = pq_results['pq']\n            prog_bar.update()\n\n        if tmp_dir is not None:\n            tmp_dir.cleanup()\n\n        # descending select topk image\n        pqs = list(sorted(pqs.items(), key=lambda kv: kv[1]))\n        good_pqs = pqs[-topk:]\n        bad_pqs = pqs[:topk]\n\n        return good_pqs, bad_pqs\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet eval image prediction result for each')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument(\n        'prediction_path', help='prediction path where test pkl result')\n    parser.add_argument(\n        'show_dir', help='directory where painted images will be saved')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--wait-time',\n        type=float,\n        default=0,\n        help='the interval of show (s), 0 is block')\n    parser.add_argument(\n        '--topk',\n        default=20,\n        type=int,\n        help='saved Number of the highest topk '\n        'and lowest topk after index sorting')\n    parser.add_argument(\n        '--show-score-thr',\n        type=float,\n        default=0,\n        help='score threshold (default: 0.)')\n    parser.add_argument(\n        '--overlay-gt-pred',\n        action='store_true',\n        help='whether to plot gts and predictions on the same image.'\n        'If False, predictions and gts will be plotted on two same'\n        'image which will be concatenated in vertical direction.'\n        'The image above is drawn with gt, and the image below is'\n        'drawn with the prediction result.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    mmcv.check_file_exist(args.prediction_path)\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    cfg.data.test.test_mode = True\n\n    cfg.data.test.pop('samples_per_gpu', 0)\n    if cfg.data.train.type in ('MultiImageMixDataset', 'ClassBalancedDataset',\n                               'RepeatDataset', 'ConcatDataset'):\n        cfg.data.test.pipeline = get_loading_pipeline(\n            cfg.data.train.dataset.pipeline)\n    else:\n        cfg.data.test.pipeline = get_loading_pipeline(cfg.data.train.pipeline)\n\n    dataset = build_dataset(cfg.data.test)\n    outputs = mmcv.load(args.prediction_path)\n\n    result_visualizer = ResultVisualizer(args.show, args.wait_time,\n                                         args.show_score_thr,\n                                         args.overlay_gt_pred)\n    result_visualizer.evaluate_and_show(\n        dataset, outputs, topk=args.topk, show_dir=args.show_dir)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/benchmark.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport copy\nimport os\nimport time\n\nimport torch\nfrom mmcv import Config, DictAction\nfrom mmcv.cnn import fuse_conv_bn\nfrom mmcv.parallel import MMDistributedDataParallel\nfrom mmcv.runner import init_dist, load_checkpoint, wrap_fp16_model\n\nfrom mmdet.datasets import (build_dataloader, build_dataset,\n                            replace_ImageToTensor)\nfrom mmdet.models import build_detector\nfrom mmdet.utils import replace_cfg_vals, update_data_root\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet benchmark a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument(\n        '--repeat-num',\n        type=int,\n        default=1,\n        help='number of repeat times of measurement for averaging the results')\n    parser.add_argument(\n        '--max-iter', type=int, default=2000, help='num of max iter')\n    parser.add_argument(\n        '--log-interval', type=int, default=50, help='interval of logging')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n    return args\n\n\ndef measure_inference_speed(cfg, checkpoint, max_iter, log_interval,\n                            is_fuse_conv_bn):\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n\n    # build the dataloader\n    samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)\n    if samples_per_gpu > 1:\n        # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n        cfg.data.test.pipeline = replace_ImageToTensor(cfg.data.test.pipeline)\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=1,\n        # Because multiple processes will occupy additional CPU resources,\n        # FPS statistics will be more unstable when workers_per_gpu is not 0.\n        # It is reasonable to set workers_per_gpu to 0.\n        workers_per_gpu=0,\n        dist=True,\n        shuffle=False)\n\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    load_checkpoint(model, checkpoint, map_location='cpu')\n    if is_fuse_conv_bn:\n        model = fuse_conv_bn(model)\n\n    model = MMDistributedDataParallel(\n        model.cuda(),\n        device_ids=[torch.cuda.current_device()],\n        broadcast_buffers=False)\n    model.eval()\n\n    # the first several iterations may be very slow so skip them\n    num_warmup = 5\n    pure_inf_time = 0\n    fps = 0\n\n    # benchmark with 2000 image and take the average\n    for i, data in enumerate(data_loader):\n\n        torch.cuda.synchronize()\n        start_time = time.perf_counter()\n\n        with torch.no_grad():\n            model(return_loss=False, rescale=True, **data)\n\n        torch.cuda.synchronize()\n        elapsed = time.perf_counter() - start_time\n\n        if i >= num_warmup:\n            pure_inf_time += elapsed\n            if (i + 1) % log_interval == 0:\n                fps = (i + 1 - num_warmup) / pure_inf_time\n                print(\n                    f'Done image [{i + 1:<3}/ {max_iter}], '\n                    f'fps: {fps:.1f} img / s, '\n                    f'times per image: {1000 / fps:.1f} ms / img',\n                    flush=True)\n\n        if (i + 1) == max_iter:\n            fps = (i + 1 - num_warmup) / pure_inf_time\n            print(\n                f'Overall fps: {fps:.1f} img / s, '\n                f'times per image: {1000 / fps:.1f} ms / img',\n                flush=True)\n            break\n    return fps\n\n\ndef repeat_measure_inference_speed(cfg,\n                                   checkpoint,\n                                   max_iter,\n                                   log_interval,\n                                   is_fuse_conv_bn,\n                                   repeat_num=1):\n    assert repeat_num >= 1\n\n    fps_list = []\n\n    for _ in range(repeat_num):\n        #\n        cp_cfg = copy.deepcopy(cfg)\n\n        fps_list.append(\n            measure_inference_speed(cp_cfg, checkpoint, max_iter, log_interval,\n                                    is_fuse_conv_bn))\n\n    if repeat_num > 1:\n        fps_list_ = [round(fps, 1) for fps in fps_list]\n        times_pre_image_list_ = [round(1000 / fps, 1) for fps in fps_list]\n        mean_fps_ = sum(fps_list_) / len(fps_list_)\n        mean_times_pre_image_ = sum(times_pre_image_list_) / len(\n            times_pre_image_list_)\n        print(\n            f'Overall fps: {fps_list_}[{mean_fps_:.1f}] img / s, '\n            f'times per image: '\n            f'{times_pre_image_list_}[{mean_times_pre_image_:.1f}] ms / img',\n            flush=True)\n        return fps_list\n\n    return fps_list[0]\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    if args.launcher == 'none':\n        raise NotImplementedError('Only supports distributed mode')\n    else:\n        init_dist(args.launcher, **cfg.dist_params)\n\n    repeat_measure_inference_speed(cfg, args.checkpoint, args.max_iter,\n                                   args.log_interval, args.fuse_conv_bn,\n                                   args.repeat_num)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/coco_error_analysis.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport copy\nimport os\nfrom argparse import ArgumentParser\nfrom multiprocessing import Pool\n\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\n\ndef makeplot(rs, ps, outDir, class_name, iou_type):\n    cs = np.vstack([\n        np.ones((2, 3)),\n        np.array([0.31, 0.51, 0.74]),\n        np.array([0.75, 0.31, 0.30]),\n        np.array([0.36, 0.90, 0.38]),\n        np.array([0.50, 0.39, 0.64]),\n        np.array([1, 0.6, 0]),\n    ])\n    areaNames = ['allarea', 'small', 'medium', 'large']\n    types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']\n    for i in range(len(areaNames)):\n        area_ps = ps[..., i, 0]\n        figure_title = iou_type + '-' + class_name + '-' + areaNames[i]\n        aps = [ps_.mean() for ps_ in area_ps]\n        ps_curve = [\n            ps_.mean(axis=1) if ps_.ndim > 1 else ps_ for ps_ in area_ps\n        ]\n        ps_curve.insert(0, np.zeros(ps_curve[0].shape))\n        fig = plt.figure()\n        ax = plt.subplot(111)\n        for k in range(len(types)):\n            ax.plot(rs, ps_curve[k + 1], color=[0, 0, 0], linewidth=0.5)\n            ax.fill_between(\n                rs,\n                ps_curve[k],\n                ps_curve[k + 1],\n                color=cs[k],\n                label=str(f'[{aps[k]:.3f}]' + types[k]),\n            )\n        plt.xlabel('recall')\n        plt.ylabel('precision')\n        plt.xlim(0, 1.0)\n        plt.ylim(0, 1.0)\n        plt.title(figure_title)\n        plt.legend()\n        # plt.show()\n        fig.savefig(outDir + f'/{figure_title}.png')\n        plt.close(fig)\n\n\ndef autolabel(ax, rects):\n    \"\"\"Attach a text label above each bar in *rects*, displaying its height.\"\"\"\n    for rect in rects:\n        height = rect.get_height()\n        if height > 0 and height <= 1:  # for percent values\n            text_label = '{:2.0f}'.format(height * 100)\n        else:\n            text_label = '{:2.0f}'.format(height)\n        ax.annotate(\n            text_label,\n            xy=(rect.get_x() + rect.get_width() / 2, height),\n            xytext=(0, 3),  # 3 points vertical offset\n            textcoords='offset points',\n            ha='center',\n            va='bottom',\n            fontsize='x-small',\n        )\n\n\ndef makebarplot(rs, ps, outDir, class_name, iou_type):\n    areaNames = ['allarea', 'small', 'medium', 'large']\n    types = ['C75', 'C50', 'Loc', 'Sim', 'Oth', 'BG', 'FN']\n    fig, ax = plt.subplots()\n    x = np.arange(len(areaNames))  # the areaNames locations\n    width = 0.60  # the width of the bars\n    rects_list = []\n    figure_title = iou_type + '-' + class_name + '-' + 'ap bar plot'\n    for i in range(len(types) - 1):\n        type_ps = ps[i, ..., 0]\n        aps = [ps_.mean() for ps_ in type_ps.T]\n        rects_list.append(\n            ax.bar(\n                x - width / 2 + (i + 1) * width / len(types),\n                aps,\n                width / len(types),\n                label=types[i],\n            ))\n\n    # Add some text for labels, title and custom x-axis tick labels, etc.\n    ax.set_ylabel('Mean Average Precision (mAP)')\n    ax.set_title(figure_title)\n    ax.set_xticks(x)\n    ax.set_xticklabels(areaNames)\n    ax.legend()\n\n    # Add score texts over bars\n    for rects in rects_list:\n        autolabel(ax, rects)\n\n    # Save plot\n    fig.savefig(outDir + f'/{figure_title}.png')\n    plt.close(fig)\n\n\ndef get_gt_area_group_numbers(cocoEval):\n    areaRng = cocoEval.params.areaRng\n    areaRngStr = [str(aRng) for aRng in areaRng]\n    areaRngLbl = cocoEval.params.areaRngLbl\n    areaRngStr2areaRngLbl = dict(zip(areaRngStr, areaRngLbl))\n    areaRngLbl2Number = dict.fromkeys(areaRngLbl, 0)\n    for evalImg in cocoEval.evalImgs:\n        if evalImg:\n            for gtIgnore in evalImg['gtIgnore']:\n                if not gtIgnore:\n                    aRngLbl = areaRngStr2areaRngLbl[str(evalImg['aRng'])]\n                    areaRngLbl2Number[aRngLbl] += 1\n    return areaRngLbl2Number\n\n\ndef make_gt_area_group_numbers_plot(cocoEval, outDir, verbose=True):\n    areaRngLbl2Number = get_gt_area_group_numbers(cocoEval)\n    areaRngLbl = areaRngLbl2Number.keys()\n    if verbose:\n        print('number of annotations per area group:', areaRngLbl2Number)\n\n    # Init figure\n    fig, ax = plt.subplots()\n    x = np.arange(len(areaRngLbl))  # the areaNames locations\n    width = 0.60  # the width of the bars\n    figure_title = 'number of annotations per area group'\n\n    rects = ax.bar(x, areaRngLbl2Number.values(), width)\n\n    # Add some text for labels, title and custom x-axis tick labels, etc.\n    ax.set_ylabel('Number of annotations')\n    ax.set_title(figure_title)\n    ax.set_xticks(x)\n    ax.set_xticklabels(areaRngLbl)\n\n    # Add score texts over bars\n    autolabel(ax, rects)\n\n    # Save plot\n    fig.tight_layout()\n    fig.savefig(outDir + f'/{figure_title}.png')\n    plt.close(fig)\n\n\ndef make_gt_area_histogram_plot(cocoEval, outDir):\n    n_bins = 100\n    areas = [ann['area'] for ann in cocoEval.cocoGt.anns.values()]\n\n    # init figure\n    figure_title = 'gt annotation areas histogram plot'\n    fig, ax = plt.subplots()\n\n    # Set the number of bins\n    ax.hist(np.sqrt(areas), bins=n_bins)\n\n    # Add some text for labels, title and custom x-axis tick labels, etc.\n    ax.set_xlabel('Squareroot Area')\n    ax.set_ylabel('Number of annotations')\n    ax.set_title(figure_title)\n\n    # Save plot\n    fig.tight_layout()\n    fig.savefig(outDir + f'/{figure_title}.png')\n    plt.close(fig)\n\n\ndef analyze_individual_category(k,\n                                cocoDt,\n                                cocoGt,\n                                catId,\n                                iou_type,\n                                areas=None):\n    nm = cocoGt.loadCats(catId)[0]\n    print(f'--------------analyzing {k + 1}-{nm[\"name\"]}---------------')\n    ps_ = {}\n    dt = copy.deepcopy(cocoDt)\n    nm = cocoGt.loadCats(catId)[0]\n    imgIds = cocoGt.getImgIds()\n    dt_anns = dt.dataset['annotations']\n    select_dt_anns = []\n    for ann in dt_anns:\n        if ann['category_id'] == catId:\n            select_dt_anns.append(ann)\n    dt.dataset['annotations'] = select_dt_anns\n    dt.createIndex()\n    # compute precision but ignore superclass confusion\n    gt = copy.deepcopy(cocoGt)\n    child_catIds = gt.getCatIds(supNms=[nm['supercategory']])\n    for idx, ann in enumerate(gt.dataset['annotations']):\n        if ann['category_id'] in child_catIds and ann['category_id'] != catId:\n            gt.dataset['annotations'][idx]['ignore'] = 1\n            gt.dataset['annotations'][idx]['iscrowd'] = 1\n            gt.dataset['annotations'][idx]['category_id'] = catId\n    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)\n    cocoEval.params.imgIds = imgIds\n    cocoEval.params.maxDets = [100]\n    cocoEval.params.iouThrs = [0.1]\n    cocoEval.params.useCats = 1\n    if areas:\n        cocoEval.params.areaRng = [[0**2, areas[2]], [0**2, areas[0]],\n                                   [areas[0], areas[1]], [areas[1], areas[2]]]\n    cocoEval.evaluate()\n    cocoEval.accumulate()\n    ps_supercategory = cocoEval.eval['precision'][0, :, k, :, :]\n    ps_['ps_supercategory'] = ps_supercategory\n    # compute precision but ignore any class confusion\n    gt = copy.deepcopy(cocoGt)\n    for idx, ann in enumerate(gt.dataset['annotations']):\n        if ann['category_id'] != catId:\n            gt.dataset['annotations'][idx]['ignore'] = 1\n            gt.dataset['annotations'][idx]['iscrowd'] = 1\n            gt.dataset['annotations'][idx]['category_id'] = catId\n    cocoEval = COCOeval(gt, copy.deepcopy(dt), iou_type)\n    cocoEval.params.imgIds = imgIds\n    cocoEval.params.maxDets = [100]\n    cocoEval.params.iouThrs = [0.1]\n    cocoEval.params.useCats = 1\n    if areas:\n        cocoEval.params.areaRng = [[0**2, areas[2]], [0**2, areas[0]],\n                                   [areas[0], areas[1]], [areas[1], areas[2]]]\n    cocoEval.evaluate()\n    cocoEval.accumulate()\n    ps_allcategory = cocoEval.eval['precision'][0, :, k, :, :]\n    ps_['ps_allcategory'] = ps_allcategory\n    return k, ps_\n\n\ndef analyze_results(res_file,\n                    ann_file,\n                    res_types,\n                    out_dir,\n                    extraplots=None,\n                    areas=None):\n    for res_type in res_types:\n        assert res_type in ['bbox', 'segm']\n    if areas:\n        assert len(areas) == 3, '3 integers should be specified as areas, \\\n            representing 3 area regions'\n\n    directory = os.path.dirname(out_dir + '/')\n    if not os.path.exists(directory):\n        print(f'-------------create {out_dir}-----------------')\n        os.makedirs(directory)\n\n    cocoGt = COCO(ann_file)\n    cocoDt = cocoGt.loadRes(res_file)\n    imgIds = cocoGt.getImgIds()\n    for res_type in res_types:\n        res_out_dir = out_dir + '/' + res_type + '/'\n        res_directory = os.path.dirname(res_out_dir)\n        if not os.path.exists(res_directory):\n            print(f'-------------create {res_out_dir}-----------------')\n            os.makedirs(res_directory)\n        iou_type = res_type\n        cocoEval = COCOeval(\n            copy.deepcopy(cocoGt), copy.deepcopy(cocoDt), iou_type)\n        cocoEval.params.imgIds = imgIds\n        cocoEval.params.iouThrs = [0.75, 0.5, 0.1]\n        cocoEval.params.maxDets = [100]\n        if areas:\n            cocoEval.params.areaRng = [[0**2, areas[2]], [0**2, areas[0]],\n                                       [areas[0], areas[1]],\n                                       [areas[1], areas[2]]]\n        cocoEval.evaluate()\n        cocoEval.accumulate()\n        ps = cocoEval.eval['precision']\n        ps = np.vstack([ps, np.zeros((4, *ps.shape[1:]))])\n        catIds = cocoGt.getCatIds()\n        recThrs = cocoEval.params.recThrs\n        with Pool(processes=48) as pool:\n            args = [(k, cocoDt, cocoGt, catId, iou_type, areas)\n                    for k, catId in enumerate(catIds)]\n            analyze_results = pool.starmap(analyze_individual_category, args)\n        for k, catId in enumerate(catIds):\n            nm = cocoGt.loadCats(catId)[0]\n            print(f'--------------saving {k + 1}-{nm[\"name\"]}---------------')\n            analyze_result = analyze_results[k]\n            assert k == analyze_result[0]\n            ps_supercategory = analyze_result[1]['ps_supercategory']\n            ps_allcategory = analyze_result[1]['ps_allcategory']\n            # compute precision but ignore superclass confusion\n            ps[3, :, k, :, :] = ps_supercategory\n            # compute precision but ignore any class confusion\n            ps[4, :, k, :, :] = ps_allcategory\n            # fill in background and false negative errors and plot\n            ps[ps == -1] = 0\n            ps[5, :, k, :, :] = ps[4, :, k, :, :] > 0\n            ps[6, :, k, :, :] = 1.0\n            makeplot(recThrs, ps[:, :, k], res_out_dir, nm['name'], iou_type)\n            if extraplots:\n                makebarplot(recThrs, ps[:, :, k], res_out_dir, nm['name'],\n                            iou_type)\n        makeplot(recThrs, ps, res_out_dir, 'allclass', iou_type)\n        if extraplots:\n            makebarplot(recThrs, ps, res_out_dir, 'allclass', iou_type)\n            make_gt_area_group_numbers_plot(\n                cocoEval=cocoEval, outDir=res_out_dir, verbose=True)\n            make_gt_area_histogram_plot(cocoEval=cocoEval, outDir=res_out_dir)\n\n\ndef main():\n    parser = ArgumentParser(description='COCO Error Analysis Tool')\n    parser.add_argument('result', help='result file (json format) path')\n    parser.add_argument('out_dir', help='dir to save analyze result images')\n    parser.add_argument(\n        '--ann',\n        default='data/coco/annotations/instances_val2017.json',\n        help='annotation file path')\n    parser.add_argument(\n        '--types', type=str, nargs='+', default=['bbox'], help='result types')\n    parser.add_argument(\n        '--extraplots',\n        action='store_true',\n        help='export extra bar/stat plots')\n    parser.add_argument(\n        '--areas',\n        type=int,\n        nargs='+',\n        default=[1024, 9216, 10000000000],\n        help='area regions')\n    args = parser.parse_args()\n    analyze_results(\n        args.result,\n        args.ann,\n        args.types,\n        out_dir=args.out_dir,\n        extraplots=args.extraplots,\n        areas=args.areas)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/confusion_matrix.py",
    "content": "import argparse\nimport os\n\nimport matplotlib.pyplot as plt\nimport mmcv\nimport numpy as np\nfrom matplotlib.ticker import MultipleLocator\nfrom mmcv import Config, DictAction\nfrom mmcv.ops import nms\n\nfrom mmdet.core.evaluation.bbox_overlaps import bbox_overlaps\nfrom mmdet.datasets import build_dataset\nfrom mmdet.utils import replace_cfg_vals, update_data_root\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Generate confusion matrix from detection results')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument(\n        'prediction_path', help='prediction path where test .pkl result')\n    parser.add_argument(\n        'save_dir', help='directory where confusion matrix will be saved')\n    parser.add_argument(\n        '--show', action='store_true', help='show confusion matrix')\n    parser.add_argument(\n        '--color-theme',\n        default='plasma',\n        help='theme of the matrix color map')\n    parser.add_argument(\n        '--score-thr',\n        type=float,\n        default=0.3,\n        help='score threshold to filter detection bboxes')\n    parser.add_argument(\n        '--tp-iou-thr',\n        type=float,\n        default=0.5,\n        help='IoU threshold to be considered as matched')\n    parser.add_argument(\n        '--nms-iou-thr',\n        type=float,\n        default=None,\n        help='nms IoU threshold, only applied when users want to change the'\n        'nms IoU threshold.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n    return args\n\n\ndef calculate_confusion_matrix(dataset,\n                               results,\n                               score_thr=0,\n                               nms_iou_thr=None,\n                               tp_iou_thr=0.5):\n    \"\"\"Calculate the confusion matrix.\n\n    Args:\n        dataset (Dataset): Test or val dataset.\n        results (list[ndarray]): A list of detection results in each image.\n        score_thr (float|optional): Score threshold to filter bboxes.\n            Default: 0.\n        nms_iou_thr (float|optional): nms IoU threshold, the detection results\n            have done nms in the detector, only applied when users want to\n            change the nms IoU threshold. Default: None.\n        tp_iou_thr (float|optional): IoU threshold to be considered as matched.\n            Default: 0.5.\n    \"\"\"\n    num_classes = len(dataset.CLASSES)\n    confusion_matrix = np.zeros(shape=[num_classes + 1, num_classes + 1])\n    assert len(dataset) == len(results)\n    prog_bar = mmcv.ProgressBar(len(results))\n    for idx, per_img_res in enumerate(results):\n        if isinstance(per_img_res, tuple):\n            res_bboxes, _ = per_img_res\n        else:\n            res_bboxes = per_img_res\n        ann = dataset.get_ann_info(idx)\n        gt_bboxes = ann['bboxes']\n        labels = ann['labels']\n        analyze_per_img_dets(confusion_matrix, gt_bboxes, labels, res_bboxes,\n                             score_thr, tp_iou_thr, nms_iou_thr)\n        prog_bar.update()\n    return confusion_matrix\n\n\ndef analyze_per_img_dets(confusion_matrix,\n                         gt_bboxes,\n                         gt_labels,\n                         result,\n                         score_thr=0,\n                         tp_iou_thr=0.5,\n                         nms_iou_thr=None):\n    \"\"\"Analyze detection results on each image.\n\n    Args:\n        confusion_matrix (ndarray): The confusion matrix,\n            has shape (num_classes + 1, num_classes + 1).\n        gt_bboxes (ndarray): Ground truth bboxes, has shape (num_gt, 4).\n        gt_labels (ndarray): Ground truth labels, has shape (num_gt).\n        result (ndarray): Detection results, has shape\n            (num_classes, num_bboxes, 5).\n        score_thr (float): Score threshold to filter bboxes.\n            Default: 0.\n        tp_iou_thr (float): IoU threshold to be considered as matched.\n            Default: 0.5.\n        nms_iou_thr (float|optional): nms IoU threshold, the detection results\n            have done nms in the detector, only applied when users want to\n            change the nms IoU threshold. Default: None.\n    \"\"\"\n    true_positives = np.zeros_like(gt_labels)\n    for det_label, det_bboxes in enumerate(result):\n        if nms_iou_thr:\n            det_bboxes, _ = nms(\n                det_bboxes[:, :4],\n                det_bboxes[:, -1],\n                nms_iou_thr,\n                score_threshold=score_thr)\n        ious = bbox_overlaps(det_bboxes[:, :4], gt_bboxes)\n        for i, det_bbox in enumerate(det_bboxes):\n            score = det_bbox[4]\n            det_match = 0\n            if score >= score_thr:\n                for j, gt_label in enumerate(gt_labels):\n                    if ious[i, j] >= tp_iou_thr:\n                        det_match += 1\n                        if gt_label == det_label:\n                            true_positives[j] += 1  # TP\n                        confusion_matrix[gt_label, det_label] += 1\n                if det_match == 0:  # BG FP\n                    confusion_matrix[-1, det_label] += 1\n    for num_tp, gt_label in zip(true_positives, gt_labels):\n        if num_tp == 0:  # FN\n            confusion_matrix[gt_label, -1] += 1\n\n\ndef plot_confusion_matrix(confusion_matrix,\n                          labels,\n                          save_dir=None,\n                          show=True,\n                          title='Normalized Confusion Matrix',\n                          color_theme='plasma'):\n    \"\"\"Draw confusion matrix with matplotlib.\n\n    Args:\n        confusion_matrix (ndarray): The confusion matrix.\n        labels (list[str]): List of class names.\n        save_dir (str|optional): If set, save the confusion matrix plot to the\n            given path. Default: None.\n        show (bool): Whether to show the plot. Default: True.\n        title (str): Title of the plot. Default: `Normalized Confusion Matrix`.\n        color_theme (str): Theme of the matrix color map. Default: `plasma`.\n    \"\"\"\n    # normalize the confusion matrix\n    per_label_sums = confusion_matrix.sum(axis=1)[:, np.newaxis]\n    confusion_matrix = \\\n        confusion_matrix.astype(np.float32) / per_label_sums * 100\n\n    num_classes = len(labels)\n    fig, ax = plt.subplots(\n        figsize=(0.5 * num_classes, 0.5 * num_classes * 0.8), dpi=180)\n    cmap = plt.get_cmap(color_theme)\n    im = ax.imshow(confusion_matrix, cmap=cmap)\n    plt.colorbar(mappable=im, ax=ax)\n\n    title_font = {'weight': 'bold', 'size': 12}\n    ax.set_title(title, fontdict=title_font)\n    label_font = {'size': 10}\n    plt.ylabel('Ground Truth Label', fontdict=label_font)\n    plt.xlabel('Prediction Label', fontdict=label_font)\n\n    # draw locator\n    xmajor_locator = MultipleLocator(1)\n    xminor_locator = MultipleLocator(0.5)\n    ax.xaxis.set_major_locator(xmajor_locator)\n    ax.xaxis.set_minor_locator(xminor_locator)\n    ymajor_locator = MultipleLocator(1)\n    yminor_locator = MultipleLocator(0.5)\n    ax.yaxis.set_major_locator(ymajor_locator)\n    ax.yaxis.set_minor_locator(yminor_locator)\n\n    # draw grid\n    ax.grid(True, which='minor', linestyle='-')\n\n    # draw label\n    ax.set_xticks(np.arange(num_classes))\n    ax.set_yticks(np.arange(num_classes))\n    ax.set_xticklabels(labels)\n    ax.set_yticklabels(labels)\n\n    ax.tick_params(\n        axis='x', bottom=False, top=True, labelbottom=False, labeltop=True)\n    plt.setp(\n        ax.get_xticklabels(), rotation=45, ha='left', rotation_mode='anchor')\n\n    # draw confution matrix value\n    for i in range(num_classes):\n        for j in range(num_classes):\n            ax.text(\n                j,\n                i,\n                '{}%'.format(\n                    int(confusion_matrix[\n                        i,\n                        j]) if not np.isnan(confusion_matrix[i, j]) else -1),\n                ha='center',\n                va='center',\n                color='w',\n                size=7)\n\n    ax.set_ylim(len(confusion_matrix) - 0.5, -0.5)  # matplotlib>3.1.1\n\n    fig.tight_layout()\n    if save_dir is not None:\n        plt.savefig(\n            os.path.join(save_dir, 'confusion_matrix.png'), format='png')\n    if show:\n        plt.show()\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    results = mmcv.load(args.prediction_path)\n    assert isinstance(results, list)\n    if isinstance(results[0], list):\n        pass\n    elif isinstance(results[0], tuple):\n        results = [result[0] for result in results]\n    else:\n        raise TypeError('invalid type of prediction results')\n\n    if isinstance(cfg.data.test, dict):\n        cfg.data.test.test_mode = True\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            ds_cfg.test_mode = True\n    dataset = build_dataset(cfg.data.test)\n\n    confusion_matrix = calculate_confusion_matrix(dataset, results,\n                                                  args.score_thr,\n                                                  args.nms_iou_thr,\n                                                  args.tp_iou_thr)\n    plot_confusion_matrix(\n        confusion_matrix,\n        dataset.CLASSES + ('background', ),\n        save_dir=args.save_dir,\n        show=args.show,\n        color_theme=args.color_theme)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/eval_metric.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\n\nimport mmcv\nfrom mmcv import Config, DictAction\n\nfrom mmdet.datasets import build_dataset\nfrom mmdet.utils import replace_cfg_vals, update_data_root\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Evaluate metric of the '\n                                     'results saved in pkl format')\n    parser.add_argument('config', help='Config of the model')\n    parser.add_argument('pkl_results', help='Results in pickle format')\n    parser.add_argument(\n        '--format-only',\n        action='store_true',\n        help='Format the output results without perform evaluation. It is'\n        'useful when you want to format the result to a specific format and '\n        'submit it to the test server')\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        help='Evaluation metrics, which depends on the dataset, e.g., \"bbox\",'\n        ' \"segm\", \"proposal\" for COCO, and \"mAP\", \"recall\" for PASCAL VOC')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--eval-options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    assert args.eval or args.format_only, (\n        'Please specify at least one operation (eval/format the results) with '\n        'the argument \"--eval\", \"--format-only\"')\n    if args.eval and args.format_only:\n        raise ValueError('--eval and --format_only cannot be both specified')\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    cfg.data.test.test_mode = True\n\n    dataset = build_dataset(cfg.data.test)\n    outputs = mmcv.load(args.pkl_results)\n\n    kwargs = {} if args.eval_options is None else args.eval_options\n    if args.format_only:\n        dataset.format_results(outputs, **kwargs)\n    if args.eval:\n        eval_kwargs = cfg.get('evaluation', {}).copy()\n        # hard-code way to remove EvalHook args\n        for key in [\n                'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',\n                'rule'\n        ]:\n            eval_kwargs.pop(key, None)\n        eval_kwargs.update(dict(metric=args.eval, **kwargs))\n        print(dataset.evaluate(outputs, **eval_kwargs))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/get_flops.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\n\nimport numpy as np\nimport torch\nfrom mmcv import Config, DictAction\n\nfrom mmdet.models import build_detector\n\ntry:\n    from mmcv.cnn import get_model_complexity_info\nexcept ImportError:\n    raise ImportError('Please upgrade mmcv to >0.6.2')\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument(\n        '--shape',\n        type=int,\n        nargs='+',\n        default=[1280, 800],\n        help='input image size')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--size-divisor',\n        type=int,\n        default=32,\n        help='Pad the input image, the minimum size that is divisible '\n        'by size_divisor, -1 means do not pad the image.')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n\n    args = parse_args()\n\n    if len(args.shape) == 1:\n        h = w = args.shape[0]\n    elif len(args.shape) == 2:\n        h, w = args.shape\n    else:\n        raise ValueError('invalid input shape')\n    ori_shape = (3, h, w)\n    divisor = args.size_divisor\n    if divisor > 0:\n        h = int(np.ceil(h / divisor)) * divisor\n        w = int(np.ceil(w / divisor)) * divisor\n\n    input_shape = (3, h, w)\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    model = build_detector(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    if torch.cuda.is_available():\n        model.cuda()\n    model.eval()\n\n    if hasattr(model, 'forward_dummy'):\n        model.forward = model.forward_dummy\n    else:\n        raise NotImplementedError(\n            'FLOPs counter is currently not currently supported with {}'.\n            format(model.__class__.__name__))\n\n    flops, params = get_model_complexity_info(model, input_shape)\n    split_line = '=' * 30\n\n    if divisor > 0 and \\\n            input_shape != ori_shape:\n        print(f'{split_line}\\nUse size divisor set input shape '\n              f'from {ori_shape} to {input_shape}\\n')\n    print(f'{split_line}\\nInput shape: {input_shape}\\n'\n          f'Flops: {flops}\\nParams: {params}\\n{split_line}')\n    print('!!!Please be cautious if you use the results in papers. '\n          'You may need to check if all ops are supported and verify that the '\n          'flops computation is correct.')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/optimize_anchors.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"Optimize anchor settings on a specific dataset.\n\nThis script provides two method to optimize YOLO anchors including k-means\nanchor cluster and differential evolution. You can use ``--algorithm k-means``\nand ``--algorithm differential_evolution`` to switch two method.\n\nExample:\n    Use k-means anchor cluster::\n\n        python tools/analysis_tools/optimize_anchors.py ${CONFIG} \\\n        --algorithm k-means --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \\\n        --output-dir ${OUTPUT_DIR}\n    Use differential evolution to optimize anchors::\n\n        python tools/analysis_tools/optimize_anchors.py ${CONFIG} \\\n        --algorithm differential_evolution \\\n        --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \\\n        --output-dir ${OUTPUT_DIR}\n\"\"\"\nimport argparse\nimport os.path as osp\n\nimport mmcv\nimport numpy as np\nimport torch\nfrom mmcv import Config\nfrom scipy.optimize import differential_evolution\n\nfrom mmdet.core import bbox_cxcywh_to_xyxy, bbox_overlaps, bbox_xyxy_to_cxcywh\nfrom mmdet.datasets import build_dataset\nfrom mmdet.utils import get_root_logger, replace_cfg_vals, update_data_root\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Optimize anchor parameters.')\n    parser.add_argument('config', help='Train config file path.')\n    parser.add_argument(\n        '--device', default='cuda:0', help='Device used for calculating.')\n    parser.add_argument(\n        '--input-shape',\n        type=int,\n        nargs='+',\n        default=[608, 608],\n        help='input image size')\n    parser.add_argument(\n        '--algorithm',\n        default='differential_evolution',\n        help='Algorithm used for anchor optimizing.'\n        'Support k-means and differential_evolution for YOLO.')\n    parser.add_argument(\n        '--iters',\n        default=1000,\n        type=int,\n        help='Maximum iterations for optimizer.')\n    parser.add_argument(\n        '--output-dir',\n        default=None,\n        type=str,\n        help='Path to save anchor optimize result.')\n\n    args = parser.parse_args()\n    return args\n\n\nclass BaseAnchorOptimizer:\n    \"\"\"Base class for anchor optimizer.\n\n    Args:\n        dataset (obj:`Dataset`): Dataset object.\n        input_shape (list[int]): Input image shape of the model.\n            Format in [width, height].\n        logger (obj:`logging.Logger`): The logger for logging.\n        device (str, optional): Device used for calculating.\n            Default: 'cuda:0'\n        out_dir (str, optional): Path to save anchor optimize result.\n            Default: None\n    \"\"\"\n\n    def __init__(self,\n                 dataset,\n                 input_shape,\n                 logger,\n                 device='cuda:0',\n                 out_dir=None):\n        self.dataset = dataset\n        self.input_shape = input_shape\n        self.logger = logger\n        self.device = device\n        self.out_dir = out_dir\n        bbox_whs, img_shapes = self.get_whs_and_shapes()\n        ratios = img_shapes.max(1, keepdims=True) / np.array([input_shape])\n\n        # resize to input shape\n        self.bbox_whs = bbox_whs / ratios\n\n    def get_whs_and_shapes(self):\n        \"\"\"Get widths and heights of bboxes and shapes of images.\n\n        Returns:\n            tuple[np.ndarray]: Array of bbox shapes and array of image\n            shapes with shape (num_bboxes, 2) in [width, height] format.\n        \"\"\"\n        self.logger.info('Collecting bboxes from annotation...')\n        bbox_whs = []\n        img_shapes = []\n        prog_bar = mmcv.ProgressBar(len(self.dataset))\n        for idx in range(len(self.dataset)):\n            ann = self.dataset.get_ann_info(idx)\n            data_info = self.dataset.data_infos[idx]\n            img_shape = np.array([data_info['width'], data_info['height']])\n            gt_bboxes = ann['bboxes']\n            for bbox in gt_bboxes:\n                wh = bbox[2:4] - bbox[0:2]\n                img_shapes.append(img_shape)\n                bbox_whs.append(wh)\n            prog_bar.update()\n        print('\\n')\n        bbox_whs = np.array(bbox_whs)\n        img_shapes = np.array(img_shapes)\n        self.logger.info(f'Collected {bbox_whs.shape[0]} bboxes.')\n        return bbox_whs, img_shapes\n\n    def get_zero_center_bbox_tensor(self):\n        \"\"\"Get a tensor of bboxes centered at (0, 0).\n\n        Returns:\n            Tensor: Tensor of bboxes with shape (num_bboxes, 4)\n            in [xmin, ymin, xmax, ymax] format.\n        \"\"\"\n        whs = torch.from_numpy(self.bbox_whs).to(\n            self.device, dtype=torch.float32)\n        bboxes = bbox_cxcywh_to_xyxy(\n            torch.cat([torch.zeros_like(whs), whs], dim=1))\n        return bboxes\n\n    def optimize(self):\n        raise NotImplementedError\n\n    def save_result(self, anchors, path=None):\n        anchor_results = []\n        for w, h in anchors:\n            anchor_results.append([round(w), round(h)])\n        self.logger.info(f'Anchor optimize result:{anchor_results}')\n        if path:\n            json_path = osp.join(path, 'anchor_optimize_result.json')\n            mmcv.dump(anchor_results, json_path)\n            self.logger.info(f'Result saved in {json_path}')\n\n\nclass YOLOKMeansAnchorOptimizer(BaseAnchorOptimizer):\n    r\"\"\"YOLO anchor optimizer using k-means. Code refer to `AlexeyAB/darknet.\n    <https://github.com/AlexeyAB/darknet/blob/master/src/detector.c>`_.\n\n    Args:\n        num_anchors (int) : Number of anchors.\n        iters (int): Maximum iterations for k-means.\n    \"\"\"\n\n    def __init__(self, num_anchors, iters, **kwargs):\n\n        super(YOLOKMeansAnchorOptimizer, self).__init__(**kwargs)\n        self.num_anchors = num_anchors\n        self.iters = iters\n\n    def optimize(self):\n        anchors = self.kmeans_anchors()\n        self.save_result(anchors, self.out_dir)\n\n    def kmeans_anchors(self):\n        self.logger.info(\n            f'Start cluster {self.num_anchors} YOLO anchors with K-means...')\n        bboxes = self.get_zero_center_bbox_tensor()\n        cluster_center_idx = torch.randint(\n            0, bboxes.shape[0], (self.num_anchors, )).to(self.device)\n\n        assignments = torch.zeros((bboxes.shape[0], )).to(self.device)\n        cluster_centers = bboxes[cluster_center_idx]\n        if self.num_anchors == 1:\n            cluster_centers = self.kmeans_maximization(bboxes, assignments,\n                                                       cluster_centers)\n            anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy()\n            anchors = sorted(anchors, key=lambda x: x[0] * x[1])\n            return anchors\n\n        prog_bar = mmcv.ProgressBar(self.iters)\n        for i in range(self.iters):\n            converged, assignments = self.kmeans_expectation(\n                bboxes, assignments, cluster_centers)\n            if converged:\n                self.logger.info(f'K-means process has converged at iter {i}.')\n                break\n            cluster_centers = self.kmeans_maximization(bboxes, assignments,\n                                                       cluster_centers)\n            prog_bar.update()\n        print('\\n')\n        avg_iou = bbox_overlaps(bboxes,\n                                cluster_centers).max(1)[0].mean().item()\n\n        anchors = bbox_xyxy_to_cxcywh(cluster_centers)[:, 2:].cpu().numpy()\n        anchors = sorted(anchors, key=lambda x: x[0] * x[1])\n        self.logger.info(f'Anchor cluster finish. Average IOU: {avg_iou}')\n\n        return anchors\n\n    def kmeans_maximization(self, bboxes, assignments, centers):\n        \"\"\"Maximization part of EM algorithm(Expectation-Maximization)\"\"\"\n        new_centers = torch.zeros_like(centers)\n        for i in range(centers.shape[0]):\n            mask = (assignments == i)\n            if mask.sum():\n                new_centers[i, :] = bboxes[mask].mean(0)\n        return new_centers\n\n    def kmeans_expectation(self, bboxes, assignments, centers):\n        \"\"\"Expectation part of EM algorithm(Expectation-Maximization)\"\"\"\n        ious = bbox_overlaps(bboxes, centers)\n        closest = ious.argmax(1)\n        converged = (closest == assignments).all()\n        return converged, closest\n\n\nclass YOLODEAnchorOptimizer(BaseAnchorOptimizer):\n    \"\"\"YOLO anchor optimizer using differential evolution algorithm.\n\n    Args:\n        num_anchors (int) : Number of anchors.\n        iters (int): Maximum iterations for k-means.\n        strategy (str): The differential evolution strategy to use.\n            Should be one of:\n\n                - 'best1bin'\n                - 'best1exp'\n                - 'rand1exp'\n                - 'randtobest1exp'\n                - 'currenttobest1exp'\n                - 'best2exp'\n                - 'rand2exp'\n                - 'randtobest1bin'\n                - 'currenttobest1bin'\n                - 'best2bin'\n                - 'rand2bin'\n                - 'rand1bin'\n\n            Default: 'best1bin'.\n        population_size (int): Total population size of evolution algorithm.\n            Default: 15.\n        convergence_thr (float): Tolerance for convergence, the\n            optimizing stops when ``np.std(pop) <= abs(convergence_thr)\n            + convergence_thr * np.abs(np.mean(population_energies))``,\n            respectively. Default: 0.0001.\n        mutation (tuple[float]): Range of dithering randomly changes the\n            mutation constant. Default: (0.5, 1).\n        recombination (float): Recombination constant of crossover probability.\n            Default: 0.7.\n    \"\"\"\n\n    def __init__(self,\n                 num_anchors,\n                 iters,\n                 strategy='best1bin',\n                 population_size=15,\n                 convergence_thr=0.0001,\n                 mutation=(0.5, 1),\n                 recombination=0.7,\n                 **kwargs):\n\n        super(YOLODEAnchorOptimizer, self).__init__(**kwargs)\n\n        self.num_anchors = num_anchors\n        self.iters = iters\n        self.strategy = strategy\n        self.population_size = population_size\n        self.convergence_thr = convergence_thr\n        self.mutation = mutation\n        self.recombination = recombination\n\n    def optimize(self):\n        anchors = self.differential_evolution()\n        self.save_result(anchors, self.out_dir)\n\n    def differential_evolution(self):\n        bboxes = self.get_zero_center_bbox_tensor()\n\n        bounds = []\n        for i in range(self.num_anchors):\n            bounds.extend([(0, self.input_shape[0]), (0, self.input_shape[1])])\n\n        result = differential_evolution(\n            func=self.avg_iou_cost,\n            bounds=bounds,\n            args=(bboxes, ),\n            strategy=self.strategy,\n            maxiter=self.iters,\n            popsize=self.population_size,\n            tol=self.convergence_thr,\n            mutation=self.mutation,\n            recombination=self.recombination,\n            updating='immediate',\n            disp=True)\n        self.logger.info(\n            f'Anchor evolution finish. Average IOU: {1 - result.fun}')\n        anchors = [(w, h) for w, h in zip(result.x[::2], result.x[1::2])]\n        anchors = sorted(anchors, key=lambda x: x[0] * x[1])\n        return anchors\n\n    @staticmethod\n    def avg_iou_cost(anchor_params, bboxes):\n        assert len(anchor_params) % 2 == 0\n        anchor_whs = torch.tensor(\n            [[w, h]\n             for w, h in zip(anchor_params[::2], anchor_params[1::2])]).to(\n                 bboxes.device, dtype=bboxes.dtype)\n        anchor_boxes = bbox_cxcywh_to_xyxy(\n            torch.cat([torch.zeros_like(anchor_whs), anchor_whs], dim=1))\n        ious = bbox_overlaps(bboxes, anchor_boxes)\n        max_ious, _ = ious.max(1)\n        cost = 1 - max_ious.mean().item()\n        return cost\n\n\ndef main():\n    logger = get_root_logger()\n    args = parse_args()\n    cfg = args.config\n    cfg = Config.fromfile(cfg)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    input_shape = args.input_shape\n    assert len(input_shape) == 2\n\n    anchor_type = cfg.model.bbox_head.anchor_generator.type\n    assert anchor_type == 'YOLOAnchorGenerator', \\\n        f'Only support optimize YOLOAnchor, but get {anchor_type}.'\n\n    base_sizes = cfg.model.bbox_head.anchor_generator.base_sizes\n    num_anchors = sum([len(sizes) for sizes in base_sizes])\n\n    train_data_cfg = cfg.data.train\n    while 'dataset' in train_data_cfg:\n        train_data_cfg = train_data_cfg['dataset']\n    dataset = build_dataset(train_data_cfg)\n\n    if args.algorithm == 'k-means':\n        optimizer = YOLOKMeansAnchorOptimizer(\n            dataset=dataset,\n            input_shape=input_shape,\n            device=args.device,\n            num_anchors=num_anchors,\n            iters=args.iters,\n            logger=logger,\n            out_dir=args.output_dir)\n    elif args.algorithm == 'differential_evolution':\n        optimizer = YOLODEAnchorOptimizer(\n            dataset=dataset,\n            input_shape=input_shape,\n            device=args.device,\n            num_anchors=num_anchors,\n            iters=args.iters,\n            logger=logger,\n            out_dir=args.output_dir)\n    else:\n        raise NotImplementedError(\n            f'Only support k-means and differential_evolution, '\n            f'but get {args.algorithm}')\n\n    optimizer.optimize()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/robustness_eval.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport os.path as osp\nfrom argparse import ArgumentParser\n\nimport mmcv\nimport numpy as np\n\n\ndef print_coco_results(results):\n\n    def _print(result, ap=1, iouThr=None, areaRng='all', maxDets=100):\n        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'\n        typeStr = '(AP)' if ap == 1 else '(AR)'\n        iouStr = '0.50:0.95' \\\n            if iouThr is None else f'{iouThr:0.2f}'\n        iStr = f' {titleStr:<18} {typeStr} @[ IoU={iouStr:<9} | '\n        iStr += f'area={areaRng:>6s} | maxDets={maxDets:>3d} ] = {result:0.3f}'\n        print(iStr)\n\n    stats = np.zeros((12, ))\n    stats[0] = _print(results[0], 1)\n    stats[1] = _print(results[1], 1, iouThr=.5)\n    stats[2] = _print(results[2], 1, iouThr=.75)\n    stats[3] = _print(results[3], 1, areaRng='small')\n    stats[4] = _print(results[4], 1, areaRng='medium')\n    stats[5] = _print(results[5], 1, areaRng='large')\n    stats[6] = _print(results[6], 0, maxDets=1)\n    stats[7] = _print(results[7], 0, maxDets=10)\n    stats[8] = _print(results[8], 0)\n    stats[9] = _print(results[9], 0, areaRng='small')\n    stats[10] = _print(results[10], 0, areaRng='medium')\n    stats[11] = _print(results[11], 0, areaRng='large')\n\n\ndef get_coco_style_results(filename,\n                           task='bbox',\n                           metric=None,\n                           prints='mPC',\n                           aggregate='benchmark'):\n\n    assert aggregate in ['benchmark', 'all']\n\n    if prints == 'all':\n        prints = ['P', 'mPC', 'rPC']\n    elif isinstance(prints, str):\n        prints = [prints]\n    for p in prints:\n        assert p in ['P', 'mPC', 'rPC']\n\n    if metric is None:\n        metrics = [\n            'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',\n            'ARs', 'ARm', 'ARl'\n        ]\n    elif isinstance(metric, list):\n        metrics = metric\n    else:\n        metrics = [metric]\n\n    for metric_name in metrics:\n        assert metric_name in [\n            'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10', 'AR100',\n            'ARs', 'ARm', 'ARl'\n        ]\n\n    eval_output = mmcv.load(filename)\n\n    num_distortions = len(list(eval_output.keys()))\n    results = np.zeros((num_distortions, 6, len(metrics)), dtype='float32')\n\n    for corr_i, distortion in enumerate(eval_output):\n        for severity in eval_output[distortion]:\n            for metric_j, metric_name in enumerate(metrics):\n                mAP = eval_output[distortion][severity][task][metric_name]\n                results[corr_i, severity, metric_j] = mAP\n\n    P = results[0, 0, :]\n    if aggregate == 'benchmark':\n        mPC = np.mean(results[:15, 1:, :], axis=(0, 1))\n    else:\n        mPC = np.mean(results[:, 1:, :], axis=(0, 1))\n    rPC = mPC / P\n\n    print(f'\\nmodel: {osp.basename(filename)}')\n    if metric is None:\n        if 'P' in prints:\n            print(f'Performance on Clean Data [P] ({task})')\n            print_coco_results(P)\n        if 'mPC' in prints:\n            print(f'Mean Performance under Corruption [mPC] ({task})')\n            print_coco_results(mPC)\n        if 'rPC' in prints:\n            print(f'Relative Performance under Corruption [rPC] ({task})')\n            print_coco_results(rPC)\n    else:\n        if 'P' in prints:\n            print(f'Performance on Clean Data [P] ({task})')\n            for metric_i, metric_name in enumerate(metrics):\n                print(f'{metric_name:5} =  {P[metric_i]:0.3f}')\n        if 'mPC' in prints:\n            print(f'Mean Performance under Corruption [mPC] ({task})')\n            for metric_i, metric_name in enumerate(metrics):\n                print(f'{metric_name:5} =  {mPC[metric_i]:0.3f}')\n        if 'rPC' in prints:\n            print(f'Relative Performance under Corruption [rPC] ({task})')\n            for metric_i, metric_name in enumerate(metrics):\n                print(f'{metric_name:5} => {rPC[metric_i] * 100:0.1f} %')\n\n    return results\n\n\ndef get_voc_style_results(filename, prints='mPC', aggregate='benchmark'):\n\n    assert aggregate in ['benchmark', 'all']\n\n    if prints == 'all':\n        prints = ['P', 'mPC', 'rPC']\n    elif isinstance(prints, str):\n        prints = [prints]\n    for p in prints:\n        assert p in ['P', 'mPC', 'rPC']\n\n    eval_output = mmcv.load(filename)\n\n    num_distortions = len(list(eval_output.keys()))\n    results = np.zeros((num_distortions, 6, 20), dtype='float32')\n\n    for i, distortion in enumerate(eval_output):\n        for severity in eval_output[distortion]:\n            mAP = [\n                eval_output[distortion][severity][j]['ap']\n                for j in range(len(eval_output[distortion][severity]))\n            ]\n            results[i, severity, :] = mAP\n\n    P = results[0, 0, :]\n    if aggregate == 'benchmark':\n        mPC = np.mean(results[:15, 1:, :], axis=(0, 1))\n    else:\n        mPC = np.mean(results[:, 1:, :], axis=(0, 1))\n    rPC = mPC / P\n\n    print(f'\\nmodel: {osp.basename(filename)}')\n    if 'P' in prints:\n        print(f'Performance on Clean Data [P] in AP50 = {np.mean(P):0.3f}')\n    if 'mPC' in prints:\n        print('Mean Performance under Corruption [mPC] in AP50 = '\n              f'{np.mean(mPC):0.3f}')\n    if 'rPC' in prints:\n        print('Relative Performance under Corruption [rPC] in % = '\n              f'{np.mean(rPC) * 100:0.1f}')\n\n    return np.mean(results, axis=2, keepdims=True)\n\n\ndef get_results(filename,\n                dataset='coco',\n                task='bbox',\n                metric=None,\n                prints='mPC',\n                aggregate='benchmark'):\n    assert dataset in ['coco', 'voc', 'cityscapes']\n\n    if dataset in ['coco', 'cityscapes']:\n        results = get_coco_style_results(\n            filename,\n            task=task,\n            metric=metric,\n            prints=prints,\n            aggregate=aggregate)\n    elif dataset == 'voc':\n        if task != 'bbox':\n            print('Only bbox analysis is supported for Pascal VOC')\n            print('Will report bbox results\\n')\n        if metric not in [None, ['AP'], ['AP50']]:\n            print('Only the AP50 metric is supported for Pascal VOC')\n            print('Will report AP50 metric\\n')\n        results = get_voc_style_results(\n            filename, prints=prints, aggregate=aggregate)\n\n    return results\n\n\ndef get_distortions_from_file(filename):\n\n    eval_output = mmcv.load(filename)\n\n    return get_distortions_from_results(eval_output)\n\n\ndef get_distortions_from_results(eval_output):\n    distortions = []\n    for i, distortion in enumerate(eval_output):\n        distortions.append(distortion.replace('_', ' '))\n    return distortions\n\n\ndef main():\n    parser = ArgumentParser(description='Corruption Result Analysis')\n    parser.add_argument('filename', help='result file path')\n    parser.add_argument(\n        '--dataset',\n        type=str,\n        choices=['coco', 'voc', 'cityscapes'],\n        default='coco',\n        help='dataset type')\n    parser.add_argument(\n        '--task',\n        type=str,\n        nargs='+',\n        choices=['bbox', 'segm'],\n        default=['bbox'],\n        help='task to report')\n    parser.add_argument(\n        '--metric',\n        nargs='+',\n        choices=[\n            None, 'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10',\n            'AR100', 'ARs', 'ARm', 'ARl'\n        ],\n        default=None,\n        help='metric to report')\n    parser.add_argument(\n        '--prints',\n        type=str,\n        nargs='+',\n        choices=['P', 'mPC', 'rPC'],\n        default='mPC',\n        help='corruption benchmark metric to print')\n    parser.add_argument(\n        '--aggregate',\n        type=str,\n        choices=['all', 'benchmark'],\n        default='benchmark',\n        help='aggregate all results or only those \\\n        for benchmark corruptions')\n\n    args = parser.parse_args()\n\n    for task in args.task:\n        get_results(\n            args.filename,\n            dataset=args.dataset,\n            task=task,\n            metric=args.metric,\n            prints=args.prints,\n            aggregate=args.aggregate)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/analysis_tools/test_robustness.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport copy\nimport os\nimport os.path as osp\n\nimport mmcv\nimport torch\nfrom mmcv import DictAction\nfrom mmcv.parallel import MMDataParallel, MMDistributedDataParallel\nfrom mmcv.runner import (get_dist_info, init_dist, load_checkpoint,\n                         wrap_fp16_model)\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\nfrom mmdet import datasets\nfrom mmdet.apis import multi_gpu_test, set_random_seed, single_gpu_test\nfrom mmdet.core import eval_map\nfrom mmdet.datasets import build_dataloader, build_dataset\nfrom mmdet.models import build_detector\nfrom tools.analysis_tools.robustness_eval import get_results\n\n\ndef coco_eval_with_return(result_files,\n                          result_types,\n                          coco,\n                          max_dets=(100, 300, 1000)):\n    for res_type in result_types:\n        assert res_type in ['proposal', 'bbox', 'segm', 'keypoints']\n\n    if mmcv.is_str(coco):\n        coco = COCO(coco)\n    assert isinstance(coco, COCO)\n\n    eval_results = {}\n    for res_type in result_types:\n        result_file = result_files[res_type]\n        assert result_file.endswith('.json')\n\n        coco_dets = coco.loadRes(result_file)\n        img_ids = coco.getImgIds()\n        iou_type = 'bbox' if res_type == 'proposal' else res_type\n        cocoEval = COCOeval(coco, coco_dets, iou_type)\n        cocoEval.params.imgIds = img_ids\n        if res_type == 'proposal':\n            cocoEval.params.useCats = 0\n            cocoEval.params.maxDets = list(max_dets)\n        cocoEval.evaluate()\n        cocoEval.accumulate()\n        cocoEval.summarize()\n        if res_type == 'segm' or res_type == 'bbox':\n            metric_names = [\n                'AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'AR1', 'AR10',\n                'AR100', 'ARs', 'ARm', 'ARl'\n            ]\n            eval_results[res_type] = {\n                metric_names[i]: cocoEval.stats[i]\n                for i in range(len(metric_names))\n            }\n        else:\n            eval_results[res_type] = cocoEval.stats\n\n    return eval_results\n\n\ndef voc_eval_with_return(result_file,\n                         dataset,\n                         iou_thr=0.5,\n                         logger='print',\n                         only_ap=True):\n    det_results = mmcv.load(result_file)\n    annotations = [dataset.get_ann_info(i) for i in range(len(dataset))]\n    if hasattr(dataset, 'year') and dataset.year == 2007:\n        dataset_name = 'voc07'\n    else:\n        dataset_name = dataset.CLASSES\n    mean_ap, eval_results = eval_map(\n        det_results,\n        annotations,\n        scale_ranges=None,\n        iou_thr=iou_thr,\n        dataset=dataset_name,\n        logger=logger)\n\n    if only_ap:\n        eval_results = [{\n            'ap': eval_results[i]['ap']\n        } for i in range(len(eval_results))]\n\n    return mean_ap, eval_results\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='MMDet test detector')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--out', help='output result file')\n    parser.add_argument(\n        '--corruptions',\n        type=str,\n        nargs='+',\n        default='benchmark',\n        choices=[\n            'all', 'benchmark', 'noise', 'blur', 'weather', 'digital',\n            'holdout', 'None', 'gaussian_noise', 'shot_noise', 'impulse_noise',\n            'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur', 'snow',\n            'frost', 'fog', 'brightness', 'contrast', 'elastic_transform',\n            'pixelate', 'jpeg_compression', 'speckle_noise', 'gaussian_blur',\n            'spatter', 'saturate'\n        ],\n        help='corruptions')\n    parser.add_argument(\n        '--severities',\n        type=int,\n        nargs='+',\n        default=[0, 1, 2, 3, 4, 5],\n        help='corruption severity levels')\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        choices=['proposal', 'proposal_fast', 'bbox', 'segm', 'keypoints'],\n        help='eval types')\n    parser.add_argument(\n        '--iou-thr',\n        type=float,\n        default=0.5,\n        help='IoU threshold for pascal voc evaluation')\n    parser.add_argument(\n        '--summaries',\n        type=bool,\n        default=False,\n        help='Print summaries for every corruption and severity')\n    parser.add_argument(\n        '--workers', type=int, default=32, help='workers per gpu')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--show-dir', help='directory where painted images will be saved')\n    parser.add_argument(\n        '--show-score-thr',\n        type=float,\n        default=0.3,\n        help='score threshold (default: 0.3)')\n    parser.add_argument('--tmpdir', help='tmp dir for writing some results')\n    parser.add_argument('--seed', type=int, default=None, help='random seed')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    parser.add_argument(\n        '--final-prints',\n        type=str,\n        nargs='+',\n        choices=['P', 'mPC', 'rPC'],\n        default='mPC',\n        help='corruption benchmark metric to print at the end')\n    parser.add_argument(\n        '--final-prints-aggregate',\n        type=str,\n        choices=['all', 'benchmark'],\n        default='benchmark',\n        help='aggregate all results or only those for benchmark corruptions')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    assert args.out or args.show or args.show_dir, \\\n        ('Please specify at least one operation (save or show the results) '\n         'with the argument \"--out\", \"--show\" or \"show-dir\"')\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = mmcv.Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n    cfg.model.pretrained = None\n    cfg.data.test.test_mode = True\n    if args.workers == 0:\n        args.workers = cfg.data.workers_per_gpu\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    # set random seeds\n    if args.seed is not None:\n        set_random_seed(args.seed)\n\n    if 'all' in args.corruptions:\n        corruptions = [\n            'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',\n            'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',\n            'brightness', 'contrast', 'elastic_transform', 'pixelate',\n            'jpeg_compression', 'speckle_noise', 'gaussian_blur', 'spatter',\n            'saturate'\n        ]\n    elif 'benchmark' in args.corruptions:\n        corruptions = [\n            'gaussian_noise', 'shot_noise', 'impulse_noise', 'defocus_blur',\n            'glass_blur', 'motion_blur', 'zoom_blur', 'snow', 'frost', 'fog',\n            'brightness', 'contrast', 'elastic_transform', 'pixelate',\n            'jpeg_compression'\n        ]\n    elif 'noise' in args.corruptions:\n        corruptions = ['gaussian_noise', 'shot_noise', 'impulse_noise']\n    elif 'blur' in args.corruptions:\n        corruptions = [\n            'defocus_blur', 'glass_blur', 'motion_blur', 'zoom_blur'\n        ]\n    elif 'weather' in args.corruptions:\n        corruptions = ['snow', 'frost', 'fog', 'brightness']\n    elif 'digital' in args.corruptions:\n        corruptions = [\n            'contrast', 'elastic_transform', 'pixelate', 'jpeg_compression'\n        ]\n    elif 'holdout' in args.corruptions:\n        corruptions = ['speckle_noise', 'gaussian_blur', 'spatter', 'saturate']\n    elif 'None' in args.corruptions:\n        corruptions = ['None']\n        args.severities = [0]\n    else:\n        corruptions = args.corruptions\n\n    rank, _ = get_dist_info()\n    aggregated_results = {}\n    for corr_i, corruption in enumerate(corruptions):\n        aggregated_results[corruption] = {}\n        for sev_i, corruption_severity in enumerate(args.severities):\n            # evaluate severity 0 (= no corruption) only once\n            if corr_i > 0 and corruption_severity == 0:\n                aggregated_results[corruption][0] = \\\n                    aggregated_results[corruptions[0]][0]\n                continue\n\n            test_data_cfg = copy.deepcopy(cfg.data.test)\n            # assign corruption and severity\n            if corruption_severity > 0:\n                corruption_trans = dict(\n                    type='Corrupt',\n                    corruption=corruption,\n                    severity=corruption_severity)\n                # TODO: hard coded \"1\", we assume that the first step is\n                # loading images, which needs to be fixed in the future\n                test_data_cfg['pipeline'].insert(1, corruption_trans)\n\n            # print info\n            print(f'\\nTesting {corruption} at severity {corruption_severity}')\n\n            # build the dataloader\n            # TODO: support multiple images per gpu\n            #       (only minor changes are needed)\n            dataset = build_dataset(test_data_cfg)\n            data_loader = build_dataloader(\n                dataset,\n                samples_per_gpu=1,\n                workers_per_gpu=args.workers,\n                dist=distributed,\n                shuffle=False)\n\n            # build the model and load checkpoint\n            cfg.model.train_cfg = None\n            model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n            fp16_cfg = cfg.get('fp16', None)\n            if fp16_cfg is not None:\n                wrap_fp16_model(model)\n            checkpoint = load_checkpoint(\n                model, args.checkpoint, map_location='cpu')\n            # old versions did not save class info in checkpoints,\n            # this walkaround is for backward compatibility\n            if 'CLASSES' in checkpoint.get('meta', {}):\n                model.CLASSES = checkpoint['meta']['CLASSES']\n            else:\n                model.CLASSES = dataset.CLASSES\n\n            if not distributed:\n                model = MMDataParallel(model, device_ids=[0])\n                show_dir = args.show_dir\n                if show_dir is not None:\n                    show_dir = osp.join(show_dir, corruption)\n                    show_dir = osp.join(show_dir, str(corruption_severity))\n                    if not osp.exists(show_dir):\n                        osp.makedirs(show_dir)\n                outputs = single_gpu_test(model, data_loader, args.show,\n                                          show_dir, args.show_score_thr)\n            else:\n                model = MMDistributedDataParallel(\n                    model.cuda(),\n                    device_ids=[torch.cuda.current_device()],\n                    broadcast_buffers=False)\n                outputs = multi_gpu_test(model, data_loader, args.tmpdir)\n\n            if args.out and rank == 0:\n                eval_results_filename = (\n                    osp.splitext(args.out)[0] + '_results' +\n                    osp.splitext(args.out)[1])\n                mmcv.dump(outputs, args.out)\n                eval_types = args.eval\n                if cfg.dataset_type == 'VOCDataset':\n                    if eval_types:\n                        for eval_type in eval_types:\n                            if eval_type == 'bbox':\n                                test_dataset = mmcv.runner.obj_from_dict(\n                                    cfg.data.test, datasets)\n                                logger = 'print' if args.summaries else None\n                                mean_ap, eval_results = \\\n                                    voc_eval_with_return(\n                                        args.out, test_dataset,\n                                        args.iou_thr, logger)\n                                aggregated_results[corruption][\n                                    corruption_severity] = eval_results\n                            else:\n                                print('\\nOnly \"bbox\" evaluation \\\n                                is supported for pascal voc')\n                else:\n                    if eval_types:\n                        print(f'Starting evaluate {\" and \".join(eval_types)}')\n                        if eval_types == ['proposal_fast']:\n                            result_file = args.out\n                        else:\n                            if not isinstance(outputs[0], dict):\n                                result_files = dataset.results2json(\n                                    outputs, args.out)\n                            else:\n                                for name in outputs[0]:\n                                    print(f'\\nEvaluating {name}')\n                                    outputs_ = [out[name] for out in outputs]\n                                    result_file = args.out\n                                    + f'.{name}'\n                                    result_files = dataset.results2json(\n                                        outputs_, result_file)\n                        eval_results = coco_eval_with_return(\n                            result_files, eval_types, dataset.coco)\n                        aggregated_results[corruption][\n                            corruption_severity] = eval_results\n                    else:\n                        print('\\nNo task was selected for evaluation;'\n                              '\\nUse --eval to select a task')\n\n                # save results after each evaluation\n                mmcv.dump(aggregated_results, eval_results_filename)\n\n    if rank == 0:\n        # print final results\n        print('\\nAggregated results:')\n        prints = args.final_prints\n        aggregate = args.final_prints_aggregate\n\n        if cfg.dataset_type == 'VOCDataset':\n            get_results(\n                eval_results_filename,\n                dataset='voc',\n                prints=prints,\n                aggregate=aggregate)\n        else:\n            get_results(\n                eval_results_filename,\n                dataset='coco',\n                prints=prints,\n                aggregate=aggregate)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/dataset_converters/cityscapes.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport glob\nimport os.path as osp\n\nimport cityscapesscripts.helpers.labels as CSLabels\nimport mmcv\nimport numpy as np\nimport pycocotools.mask as maskUtils\n\n\ndef collect_files(img_dir, gt_dir):\n    suffix = 'leftImg8bit.png'\n    files = []\n    for img_file in glob.glob(osp.join(img_dir, '**/*.png')):\n        assert img_file.endswith(suffix), img_file\n        inst_file = gt_dir + img_file[\n            len(img_dir):-len(suffix)] + 'gtFine_instanceIds.png'\n        # Note that labelIds are not converted to trainId for seg map\n        segm_file = gt_dir + img_file[\n            len(img_dir):-len(suffix)] + 'gtFine_labelIds.png'\n        files.append((img_file, inst_file, segm_file))\n    assert len(files), f'No images found in {img_dir}'\n    print(f'Loaded {len(files)} images from {img_dir}')\n\n    return files\n\n\ndef collect_annotations(files, nproc=1):\n    print('Loading annotation images')\n    if nproc > 1:\n        images = mmcv.track_parallel_progress(\n            load_img_info, files, nproc=nproc)\n    else:\n        images = mmcv.track_progress(load_img_info, files)\n\n    return images\n\n\ndef load_img_info(files):\n    img_file, inst_file, segm_file = files\n    inst_img = mmcv.imread(inst_file, 'unchanged')\n    # ids < 24 are stuff labels (filtering them first is about 5% faster)\n    unique_inst_ids = np.unique(inst_img[inst_img >= 24])\n    anno_info = []\n    for inst_id in unique_inst_ids:\n        # For non-crowd annotations, inst_id // 1000 is the label_id\n        # Crowd annotations have <1000 instance ids\n        label_id = inst_id // 1000 if inst_id >= 1000 else inst_id\n        label = CSLabels.id2label[label_id]\n        if not label.hasInstances or label.ignoreInEval:\n            continue\n\n        category_id = label.id\n        iscrowd = int(inst_id < 1000)\n        mask = np.asarray(inst_img == inst_id, dtype=np.uint8, order='F')\n        mask_rle = maskUtils.encode(mask[:, :, None])[0]\n\n        area = maskUtils.area(mask_rle)\n        # convert to COCO style XYWH format\n        bbox = maskUtils.toBbox(mask_rle)\n\n        # for json encoding\n        mask_rle['counts'] = mask_rle['counts'].decode()\n\n        anno = dict(\n            iscrowd=iscrowd,\n            category_id=category_id,\n            bbox=bbox.tolist(),\n            area=area.tolist(),\n            segmentation=mask_rle)\n        anno_info.append(anno)\n    video_name = osp.basename(osp.dirname(img_file))\n    img_info = dict(\n        # remove img_prefix for filename\n        file_name=osp.join(video_name, osp.basename(img_file)),\n        height=inst_img.shape[0],\n        width=inst_img.shape[1],\n        anno_info=anno_info,\n        segm_file=osp.join(video_name, osp.basename(segm_file)))\n\n    return img_info\n\n\ndef cvt_annotations(image_infos, out_json_name):\n    out_json = dict()\n    img_id = 0\n    ann_id = 0\n    out_json['images'] = []\n    out_json['categories'] = []\n    out_json['annotations'] = []\n    for image_info in image_infos:\n        image_info['id'] = img_id\n        anno_infos = image_info.pop('anno_info')\n        out_json['images'].append(image_info)\n        for anno_info in anno_infos:\n            anno_info['image_id'] = img_id\n            anno_info['id'] = ann_id\n            out_json['annotations'].append(anno_info)\n            ann_id += 1\n        img_id += 1\n    for label in CSLabels.labels:\n        if label.hasInstances and not label.ignoreInEval:\n            cat = dict(id=label.id, name=label.name)\n            out_json['categories'].append(cat)\n\n    if len(out_json['annotations']) == 0:\n        out_json.pop('annotations')\n\n    mmcv.dump(out_json, out_json_name)\n    return out_json\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert Cityscapes annotations to COCO format')\n    parser.add_argument('cityscapes_path', help='cityscapes data path')\n    parser.add_argument('--img-dir', default='leftImg8bit', type=str)\n    parser.add_argument('--gt-dir', default='gtFine', type=str)\n    parser.add_argument('-o', '--out-dir', help='output path')\n    parser.add_argument(\n        '--nproc', default=1, type=int, help='number of process')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    cityscapes_path = args.cityscapes_path\n    out_dir = args.out_dir if args.out_dir else cityscapes_path\n    mmcv.mkdir_or_exist(out_dir)\n\n    img_dir = osp.join(cityscapes_path, args.img_dir)\n    gt_dir = osp.join(cityscapes_path, args.gt_dir)\n\n    set_name = dict(\n        train='instancesonly_filtered_gtFine_train.json',\n        val='instancesonly_filtered_gtFine_val.json',\n        test='instancesonly_filtered_gtFine_test.json')\n\n    for split, json_name in set_name.items():\n        print(f'Converting {split} into {json_name}')\n        with mmcv.Timer(\n                print_tmpl='It took {}s to convert Cityscapes annotation'):\n            files = collect_files(\n                osp.join(img_dir, split), osp.join(gt_dir, split))\n            image_infos = collect_annotations(files, nproc=args.nproc)\n            cvt_annotations(image_infos, osp.join(out_dir, json_name))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/dataset_converters/images2coco.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\n\nimport mmcv\nfrom PIL import Image\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert images to coco format without annotations')\n    parser.add_argument('img_path', help='The root path of images')\n    parser.add_argument(\n        'classes', type=str, help='The text file name of storage class list')\n    parser.add_argument(\n        'out',\n        type=str,\n        help='The output annotation json file name, The save dir is in the '\n        'same directory as img_path')\n    parser.add_argument(\n        '-e',\n        '--exclude-extensions',\n        type=str,\n        nargs='+',\n        help='The suffix of images to be excluded, such as \"png\" and \"bmp\"')\n    args = parser.parse_args()\n    return args\n\n\ndef collect_image_infos(path, exclude_extensions=None):\n    img_infos = []\n\n    images_generator = mmcv.scandir(path, recursive=True)\n    for image_path in mmcv.track_iter_progress(list(images_generator)):\n        if exclude_extensions is None or (\n                exclude_extensions is not None\n                and not image_path.lower().endswith(exclude_extensions)):\n            image_path = os.path.join(path, image_path)\n            img_pillow = Image.open(image_path)\n            img_info = {\n                'filename': image_path,\n                'width': img_pillow.width,\n                'height': img_pillow.height,\n            }\n            img_infos.append(img_info)\n    return img_infos\n\n\ndef cvt_to_coco_json(img_infos, classes):\n    image_id = 0\n    coco = dict()\n    coco['images'] = []\n    coco['type'] = 'instance'\n    coco['categories'] = []\n    coco['annotations'] = []\n    image_set = set()\n\n    for category_id, name in enumerate(classes):\n        category_item = dict()\n        category_item['supercategory'] = str('none')\n        category_item['id'] = int(category_id)\n        category_item['name'] = str(name)\n        coco['categories'].append(category_item)\n\n    for img_dict in img_infos:\n        file_name = img_dict['filename']\n        assert file_name not in image_set\n        image_item = dict()\n        image_item['id'] = int(image_id)\n        image_item['file_name'] = str(file_name)\n        image_item['height'] = int(img_dict['height'])\n        image_item['width'] = int(img_dict['width'])\n        coco['images'].append(image_item)\n        image_set.add(file_name)\n\n        image_id += 1\n    return coco\n\n\ndef main():\n    args = parse_args()\n    assert args.out.endswith(\n        'json'), 'The output file name must be json suffix'\n\n    # 1 load image list info\n    img_infos = collect_image_infos(args.img_path, args.exclude_extensions)\n\n    # 2 convert to coco format data\n    classes = mmcv.list_from_file(args.classes)\n    coco_info = cvt_to_coco_json(img_infos, classes)\n\n    # 3 dump\n    save_dir = os.path.join(args.img_path, '..', 'annotations')\n    mmcv.mkdir_or_exist(save_dir)\n    save_path = os.path.join(save_dir, args.out)\n    mmcv.dump(coco_info, save_path)\n    print(f'save json file: {save_path}')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/dataset_converters/pascal_voc.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os.path as osp\nimport xml.etree.ElementTree as ET\n\nimport mmcv\nimport numpy as np\n\nfrom mmdet.core import voc_classes\n\nlabel_ids = {name: i for i, name in enumerate(voc_classes())}\n\n\ndef parse_xml(args):\n    xml_path, img_path = args\n    tree = ET.parse(xml_path)\n    root = tree.getroot()\n    size = root.find('size')\n    w = int(size.find('width').text)\n    h = int(size.find('height').text)\n    bboxes = []\n    labels = []\n    bboxes_ignore = []\n    labels_ignore = []\n    for obj in root.findall('object'):\n        name = obj.find('name').text\n        label = label_ids[name]\n        difficult = int(obj.find('difficult').text)\n        bnd_box = obj.find('bndbox')\n        bbox = [\n            int(bnd_box.find('xmin').text),\n            int(bnd_box.find('ymin').text),\n            int(bnd_box.find('xmax').text),\n            int(bnd_box.find('ymax').text)\n        ]\n        if difficult:\n            bboxes_ignore.append(bbox)\n            labels_ignore.append(label)\n        else:\n            bboxes.append(bbox)\n            labels.append(label)\n    if not bboxes:\n        bboxes = np.zeros((0, 4))\n        labels = np.zeros((0, ))\n    else:\n        bboxes = np.array(bboxes, ndmin=2) - 1\n        labels = np.array(labels)\n    if not bboxes_ignore:\n        bboxes_ignore = np.zeros((0, 4))\n        labels_ignore = np.zeros((0, ))\n    else:\n        bboxes_ignore = np.array(bboxes_ignore, ndmin=2) - 1\n        labels_ignore = np.array(labels_ignore)\n    annotation = {\n        'filename': img_path,\n        'width': w,\n        'height': h,\n        'ann': {\n            'bboxes': bboxes.astype(np.float32),\n            'labels': labels.astype(np.int64),\n            'bboxes_ignore': bboxes_ignore.astype(np.float32),\n            'labels_ignore': labels_ignore.astype(np.int64)\n        }\n    }\n    return annotation\n\n\ndef cvt_annotations(devkit_path, years, split, out_file):\n    if not isinstance(years, list):\n        years = [years]\n    annotations = []\n    for year in years:\n        filelist = osp.join(devkit_path,\n                            f'VOC{year}/ImageSets/Main/{split}.txt')\n        if not osp.isfile(filelist):\n            print(f'filelist does not exist: {filelist}, '\n                  f'skip voc{year} {split}')\n            return\n        img_names = mmcv.list_from_file(filelist)\n        xml_paths = [\n            osp.join(devkit_path, f'VOC{year}/Annotations/{img_name}.xml')\n            for img_name in img_names\n        ]\n        img_paths = [\n            f'VOC{year}/JPEGImages/{img_name}.jpg' for img_name in img_names\n        ]\n        part_annotations = mmcv.track_progress(parse_xml,\n                                               list(zip(xml_paths, img_paths)))\n        annotations.extend(part_annotations)\n    if out_file.endswith('json'):\n        annotations = cvt_to_coco_json(annotations)\n    mmcv.dump(annotations, out_file)\n    return annotations\n\n\ndef cvt_to_coco_json(annotations):\n    image_id = 0\n    annotation_id = 0\n    coco = dict()\n    coco['images'] = []\n    coco['type'] = 'instance'\n    coco['categories'] = []\n    coco['annotations'] = []\n    image_set = set()\n\n    def addAnnItem(annotation_id, image_id, category_id, bbox, difficult_flag):\n        annotation_item = dict()\n        annotation_item['segmentation'] = []\n\n        seg = []\n        # bbox[] is x1,y1,x2,y2\n        # left_top\n        seg.append(int(bbox[0]))\n        seg.append(int(bbox[1]))\n        # left_bottom\n        seg.append(int(bbox[0]))\n        seg.append(int(bbox[3]))\n        # right_bottom\n        seg.append(int(bbox[2]))\n        seg.append(int(bbox[3]))\n        # right_top\n        seg.append(int(bbox[2]))\n        seg.append(int(bbox[1]))\n\n        annotation_item['segmentation'].append(seg)\n\n        xywh = np.array(\n            [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]])\n        annotation_item['area'] = int(xywh[2] * xywh[3])\n        if difficult_flag == 1:\n            annotation_item['ignore'] = 0\n            annotation_item['iscrowd'] = 1\n        else:\n            annotation_item['ignore'] = 0\n            annotation_item['iscrowd'] = 0\n        annotation_item['image_id'] = int(image_id)\n        annotation_item['bbox'] = xywh.astype(int).tolist()\n        annotation_item['category_id'] = int(category_id)\n        annotation_item['id'] = int(annotation_id)\n        coco['annotations'].append(annotation_item)\n        return annotation_id + 1\n\n    for category_id, name in enumerate(voc_classes()):\n        category_item = dict()\n        category_item['supercategory'] = str('none')\n        category_item['id'] = int(category_id)\n        category_item['name'] = str(name)\n        coco['categories'].append(category_item)\n\n    for ann_dict in annotations:\n        file_name = ann_dict['filename']\n        ann = ann_dict['ann']\n        assert file_name not in image_set\n        image_item = dict()\n        image_item['id'] = int(image_id)\n        image_item['file_name'] = str(file_name)\n        image_item['height'] = int(ann_dict['height'])\n        image_item['width'] = int(ann_dict['width'])\n        coco['images'].append(image_item)\n        image_set.add(file_name)\n\n        bboxes = ann['bboxes'][:, :4]\n        labels = ann['labels']\n        for bbox_id in range(len(bboxes)):\n            bbox = bboxes[bbox_id]\n            label = labels[bbox_id]\n            annotation_id = addAnnItem(\n                annotation_id, image_id, label, bbox, difficult_flag=0)\n\n        bboxes_ignore = ann['bboxes_ignore'][:, :4]\n        labels_ignore = ann['labels_ignore']\n        for bbox_id in range(len(bboxes_ignore)):\n            bbox = bboxes_ignore[bbox_id]\n            label = labels_ignore[bbox_id]\n            annotation_id = addAnnItem(\n                annotation_id, image_id, label, bbox, difficult_flag=1)\n\n        image_id += 1\n\n    return coco\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert PASCAL VOC annotations to mmdetection format')\n    parser.add_argument('devkit_path', help='pascal voc devkit path')\n    parser.add_argument('-o', '--out-dir', help='output path')\n    parser.add_argument(\n        '--out-format',\n        default='pkl',\n        choices=('pkl', 'coco'),\n        help='output format, \"coco\" indicates coco annotation format')\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n    devkit_path = args.devkit_path\n    out_dir = args.out_dir if args.out_dir else devkit_path\n    mmcv.mkdir_or_exist(out_dir)\n\n    years = []\n    if osp.isdir(osp.join(devkit_path, 'VOC2007')):\n        years.append('2007')\n    if osp.isdir(osp.join(devkit_path, 'VOC2012')):\n        years.append('2012')\n    if '2007' in years and '2012' in years:\n        years.append(['2007', '2012'])\n    if not years:\n        raise IOError(f'The devkit path {devkit_path} contains neither '\n                      '\"VOC2007\" nor \"VOC2012\" subfolder')\n    out_fmt = f'.{args.out_format}'\n    if args.out_format == 'coco':\n        out_fmt = '.json'\n    for year in years:\n        if year == '2007':\n            prefix = 'voc07'\n        elif year == '2012':\n            prefix = 'voc12'\n        elif year == ['2007', '2012']:\n            prefix = 'voc0712'\n        for split in ['train', 'val', 'trainval']:\n            dataset_name = prefix + '_' + split\n            print(f'processing {dataset_name} ...')\n            cvt_annotations(devkit_path, year, split,\n                            osp.join(out_dir, dataset_name + out_fmt))\n        if not isinstance(year, list):\n            dataset_name = prefix + '_test'\n            print(f'processing {dataset_name} ...')\n            cvt_annotations(devkit_path, year, 'test',\n                            osp.join(out_dir, dataset_name + out_fmt))\n    print('Done!')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/deployment/mmdet2torchserve.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nfrom argparse import ArgumentParser, Namespace\nfrom pathlib import Path\nfrom tempfile import TemporaryDirectory\n\nimport mmcv\n\ntry:\n    from model_archiver.model_packaging import package_model\n    from model_archiver.model_packaging_utils import ModelExportUtils\nexcept ImportError:\n    package_model = None\n\n\ndef mmdet2torchserve(\n    config_file: str,\n    checkpoint_file: str,\n    output_folder: str,\n    model_name: str,\n    model_version: str = '1.0',\n    force: bool = False,\n):\n    \"\"\"Converts MMDetection model (config + checkpoint) to TorchServe `.mar`.\n\n    Args:\n        config_file:\n            In MMDetection config format.\n            The contents vary for each task repository.\n        checkpoint_file:\n            In MMDetection checkpoint format.\n            The contents vary for each task repository.\n        output_folder:\n            Folder where `{model_name}.mar` will be created.\n            The file created will be in TorchServe archive format.\n        model_name:\n            If not None, used for naming the `{model_name}.mar` file\n            that will be created under `output_folder`.\n            If None, `{Path(checkpoint_file).stem}` will be used.\n        model_version:\n            Model's version.\n        force:\n            If True, if there is an existing `{model_name}.mar`\n            file under `output_folder` it will be overwritten.\n    \"\"\"\n    mmcv.mkdir_or_exist(output_folder)\n\n    config = mmcv.Config.fromfile(config_file)\n\n    with TemporaryDirectory() as tmpdir:\n        config.dump(f'{tmpdir}/config.py')\n\n        args = Namespace(\n            **{\n                'model_file': f'{tmpdir}/config.py',\n                'serialized_file': checkpoint_file,\n                'handler': f'{Path(__file__).parent}/mmdet_handler.py',\n                'model_name': model_name or Path(checkpoint_file).stem,\n                'version': model_version,\n                'export_path': output_folder,\n                'force': force,\n                'requirements_file': None,\n                'extra_files': None,\n                'runtime': 'python',\n                'archive_format': 'default'\n            })\n        manifest = ModelExportUtils.generate_manifest_json(args)\n        package_model(args, manifest)\n\n\ndef parse_args():\n    parser = ArgumentParser(\n        description='Convert MMDetection models to TorchServe `.mar` format.')\n    parser.add_argument('config', type=str, help='config file path')\n    parser.add_argument('checkpoint', type=str, help='checkpoint file path')\n    parser.add_argument(\n        '--output-folder',\n        type=str,\n        required=True,\n        help='Folder where `{model_name}.mar` will be created.')\n    parser.add_argument(\n        '--model-name',\n        type=str,\n        default=None,\n        help='If not None, used for naming the `{model_name}.mar`'\n        'file that will be created under `output_folder`.'\n        'If None, `{Path(checkpoint_file).stem}` will be used.')\n    parser.add_argument(\n        '--model-version',\n        type=str,\n        default='1.0',\n        help='Number used for versioning.')\n    parser.add_argument(\n        '-f',\n        '--force',\n        action='store_true',\n        help='overwrite the existing `{model_name}.mar`')\n    args = parser.parse_args()\n\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n\n    if package_model is None:\n        raise ImportError('`torch-model-archiver` is required.'\n                          'Try: pip install torch-model-archiver')\n\n    mmdet2torchserve(args.config, args.checkpoint, args.output_folder,\n                     args.model_name, args.model_version, args.force)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/deployment/mmdet_handler.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport base64\nimport os\n\nimport mmcv\nimport torch\nfrom ts.torch_handler.base_handler import BaseHandler\n\nfrom mmdet.apis import inference_detector, init_detector\n\n\nclass MMdetHandler(BaseHandler):\n    threshold = 0.5\n\n    def initialize(self, context):\n        properties = context.system_properties\n        self.map_location = 'cuda' if torch.cuda.is_available() else 'cpu'\n        self.device = torch.device(self.map_location + ':' +\n                                   str(properties.get('gpu_id')) if torch.cuda.\n                                   is_available() else self.map_location)\n        self.manifest = context.manifest\n\n        model_dir = properties.get('model_dir')\n        serialized_file = self.manifest['model']['serializedFile']\n        checkpoint = os.path.join(model_dir, serialized_file)\n        self.config_file = os.path.join(model_dir, 'config.py')\n\n        self.model = init_detector(self.config_file, checkpoint, self.device)\n        self.initialized = True\n\n    def preprocess(self, data):\n        images = []\n\n        for row in data:\n            image = row.get('data') or row.get('body')\n            if isinstance(image, str):\n                image = base64.b64decode(image)\n            image = mmcv.imfrombytes(image)\n            images.append(image)\n\n        return images\n\n    def inference(self, data, *args, **kwargs):\n        results = inference_detector(self.model, data)\n        return results\n\n    def postprocess(self, data):\n        # Format output following the example ObjectDetectionHandler format\n        output = []\n        for image_index, image_result in enumerate(data):\n            output.append([])\n            if isinstance(image_result, tuple):\n                bbox_result, segm_result = image_result\n                if isinstance(segm_result, tuple):\n                    segm_result = segm_result[0]  # ms rcnn\n            else:\n                bbox_result, segm_result = image_result, None\n\n            for class_index, class_result in enumerate(bbox_result):\n                class_name = self.model.CLASSES[class_index]\n                for bbox in class_result:\n                    bbox_coords = bbox[:-1].tolist()\n                    score = float(bbox[-1])\n                    if score >= self.threshold:\n                        output[image_index].append({\n                            'class_name': class_name,\n                            'bbox': bbox_coords,\n                            'score': score\n                        })\n\n        return output\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/deployment/onnx2tensorrt.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport os.path as osp\nimport warnings\n\nimport numpy as np\nimport onnx\nimport torch\nfrom mmcv import Config\nfrom mmcv.tensorrt import is_tensorrt_plugin_loaded, onnx2trt, save_trt_engine\n\nfrom mmdet.core.export import preprocess_example_input\nfrom mmdet.core.export.model_wrappers import (ONNXRuntimeDetector,\n                                              TensorRTDetector)\nfrom mmdet.datasets import DATASETS\n\n\ndef get_GiB(x: int):\n    \"\"\"return x GiB.\"\"\"\n    return x * (1 << 30)\n\n\ndef onnx2tensorrt(onnx_file,\n                  trt_file,\n                  input_config,\n                  verify=False,\n                  show=False,\n                  workspace_size=1,\n                  verbose=False):\n    import tensorrt as trt\n    onnx_model = onnx.load(onnx_file)\n    max_shape = input_config['max_shape']\n    min_shape = input_config['min_shape']\n    opt_shape = input_config['opt_shape']\n    fp16_mode = False\n    # create trt engine and wrapper\n    opt_shape_dict = {'input': [min_shape, opt_shape, max_shape]}\n    max_workspace_size = get_GiB(workspace_size)\n    trt_engine = onnx2trt(\n        onnx_model,\n        opt_shape_dict,\n        log_level=trt.Logger.VERBOSE if verbose else trt.Logger.ERROR,\n        fp16_mode=fp16_mode,\n        max_workspace_size=max_workspace_size)\n    save_dir, _ = osp.split(trt_file)\n    if save_dir:\n        os.makedirs(save_dir, exist_ok=True)\n    save_trt_engine(trt_engine, trt_file)\n    print(f'Successfully created TensorRT engine: {trt_file}')\n\n    if verify:\n        # prepare input\n        one_img, one_meta = preprocess_example_input(input_config)\n        img_list, img_meta_list = [one_img], [[one_meta]]\n        img_list = [_.cuda().contiguous() for _ in img_list]\n\n        # wrap ONNX and TensorRT model\n        onnx_model = ONNXRuntimeDetector(onnx_file, CLASSES, device_id=0)\n        trt_model = TensorRTDetector(trt_file, CLASSES, device_id=0)\n\n        # inference with wrapped model\n        with torch.no_grad():\n            onnx_results = onnx_model(\n                img_list, img_metas=img_meta_list, return_loss=False)[0]\n            trt_results = trt_model(\n                img_list, img_metas=img_meta_list, return_loss=False)[0]\n\n        if show:\n            out_file_ort, out_file_trt = None, None\n        else:\n            out_file_ort, out_file_trt = 'show-ort.png', 'show-trt.png'\n        show_img = one_meta['show_img']\n        score_thr = 0.3\n        onnx_model.show_result(\n            show_img,\n            onnx_results,\n            score_thr=score_thr,\n            show=True,\n            win_name='ONNXRuntime',\n            out_file=out_file_ort)\n        trt_model.show_result(\n            show_img,\n            trt_results,\n            score_thr=score_thr,\n            show=True,\n            win_name='TensorRT',\n            out_file=out_file_trt)\n        with_mask = trt_model.with_masks\n        # compare a part of result\n        if with_mask:\n            compare_pairs = list(zip(onnx_results, trt_results))\n        else:\n            compare_pairs = [(onnx_results, trt_results)]\n        err_msg = 'The numerical values are different between Pytorch' + \\\n                  ' and ONNX, but it does not necessarily mean the' + \\\n                  ' exported ONNX model is problematic.'\n        # check the numerical value\n        for onnx_res, pytorch_res in compare_pairs:\n            for o_res, p_res in zip(onnx_res, pytorch_res):\n                np.testing.assert_allclose(\n                    o_res, p_res, rtol=1e-03, atol=1e-05, err_msg=err_msg)\n        print('The numerical values are the same between Pytorch and ONNX')\n\n\ndef parse_normalize_cfg(test_pipeline):\n    transforms = None\n    for pipeline in test_pipeline:\n        if 'transforms' in pipeline:\n            transforms = pipeline['transforms']\n            break\n    assert transforms is not None, 'Failed to find `transforms`'\n    norm_config_li = [_ for _ in transforms if _['type'] == 'Normalize']\n    assert len(norm_config_li) == 1, '`norm_config` should only have one'\n    norm_config = norm_config_li[0]\n    return norm_config\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert MMDetection models from ONNX to TensorRT')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('model', help='Filename of input ONNX model')\n    parser.add_argument(\n        '--trt-file',\n        type=str,\n        default='tmp.trt',\n        help='Filename of output TensorRT engine')\n    parser.add_argument(\n        '--input-img', type=str, default='', help='Image for test')\n    parser.add_argument(\n        '--show', action='store_true', help='Whether to show output results')\n    parser.add_argument(\n        '--dataset',\n        type=str,\n        default='coco',\n        help='Dataset name. This argument is deprecated and will be \\\n        removed in future releases.')\n    parser.add_argument(\n        '--verify',\n        action='store_true',\n        help='Verify the outputs of ONNXRuntime and TensorRT')\n    parser.add_argument(\n        '--verbose',\n        action='store_true',\n        help='Whether to verbose logging messages while creating \\\n                TensorRT engine. Defaults to False.')\n    parser.add_argument(\n        '--to-rgb',\n        action='store_false',\n        help='Feed model with RGB or BGR image. Default is RGB. This \\\n        argument is deprecated and will be removed in future releases.')\n    parser.add_argument(\n        '--shape',\n        type=int,\n        nargs='+',\n        default=[400, 600],\n        help='Input size of the model')\n    parser.add_argument(\n        '--mean',\n        type=float,\n        nargs='+',\n        default=[123.675, 116.28, 103.53],\n        help='Mean value used for preprocess input data. This argument \\\n        is deprecated and will be removed in future releases.')\n    parser.add_argument(\n        '--std',\n        type=float,\n        nargs='+',\n        default=[58.395, 57.12, 57.375],\n        help='Variance value used for preprocess input data. \\\n        This argument is deprecated and will be removed in future releases.')\n    parser.add_argument(\n        '--min-shape',\n        type=int,\n        nargs='+',\n        default=None,\n        help='Minimum input size of the model in TensorRT')\n    parser.add_argument(\n        '--max-shape',\n        type=int,\n        nargs='+',\n        default=None,\n        help='Maximum input size of the model in TensorRT')\n    parser.add_argument(\n        '--workspace-size',\n        type=int,\n        default=1,\n        help='Max workspace size in GiB')\n\n    args = parser.parse_args()\n    return args\n\n\nif __name__ == '__main__':\n\n    assert is_tensorrt_plugin_loaded(), 'TensorRT plugin should be compiled.'\n    args = parse_args()\n    warnings.warn(\n        'Arguments like `--to-rgb`, `--mean`, `--std`, `--dataset` would be \\\n        parsed directly from config file and are deprecated and will be \\\n        removed in future releases.')\n    if not args.input_img:\n        args.input_img = osp.join(osp.dirname(__file__), '../../demo/demo.jpg')\n\n    cfg = Config.fromfile(args.config)\n\n    def parse_shape(shape):\n        if len(shape) == 1:\n            shape = (1, 3, shape[0], shape[0])\n        elif len(args.shape) == 2:\n            shape = (1, 3) + tuple(shape)\n        else:\n            raise ValueError('invalid input shape')\n        return shape\n\n    if args.shape:\n        input_shape = parse_shape(args.shape)\n    else:\n        img_scale = cfg.test_pipeline[1]['img_scale']\n        input_shape = (1, 3, img_scale[1], img_scale[0])\n\n    if not args.max_shape:\n        max_shape = input_shape\n    else:\n        max_shape = parse_shape(args.max_shape)\n\n    if not args.min_shape:\n        min_shape = input_shape\n    else:\n        min_shape = parse_shape(args.min_shape)\n\n    dataset = DATASETS.get(cfg.data.test['type'])\n    assert (dataset is not None)\n    CLASSES = dataset.CLASSES\n    normalize_cfg = parse_normalize_cfg(cfg.test_pipeline)\n\n    input_config = {\n        'min_shape': min_shape,\n        'opt_shape': input_shape,\n        'max_shape': max_shape,\n        'input_shape': input_shape,\n        'input_path': args.input_img,\n        'normalize_cfg': normalize_cfg\n    }\n    # Create TensorRT engine\n    onnx2tensorrt(\n        args.model,\n        args.trt_file,\n        input_config,\n        verify=args.verify,\n        show=args.show,\n        workspace_size=args.workspace_size,\n        verbose=args.verbose)\n\n    # Following strings of text style are from colorama package\n    bright_style, reset_style = '\\x1b[1m', '\\x1b[0m'\n    red_text, blue_text = '\\x1b[31m', '\\x1b[34m'\n    white_background = '\\x1b[107m'\n\n    msg = white_background + bright_style + red_text\n    msg += 'DeprecationWarning: This tool will be deprecated in future. '\n    msg += blue_text + 'Welcome to use the unified model deployment toolbox '\n    msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'\n    msg += reset_style\n    warnings.warn(msg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/deployment/pytorch2onnx.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os.path as osp\nimport warnings\nfrom functools import partial\n\nimport numpy as np\nimport onnx\nimport torch\nfrom mmcv import Config, DictAction\n\nfrom mmdet.core.export import build_model_from_cfg, preprocess_example_input\nfrom mmdet.core.export.model_wrappers import ONNXRuntimeDetector\n\n\ndef pytorch2onnx(model,\n                 input_img,\n                 input_shape,\n                 normalize_cfg,\n                 opset_version=11,\n                 show=False,\n                 output_file='tmp.onnx',\n                 verify=False,\n                 test_img=None,\n                 do_simplify=False,\n                 dynamic_export=None,\n                 skip_postprocess=False):\n\n    input_config = {\n        'input_shape': input_shape,\n        'input_path': input_img,\n        'normalize_cfg': normalize_cfg\n    }\n    # prepare input\n    one_img, one_meta = preprocess_example_input(input_config)\n    img_list, img_meta_list = [one_img], [[one_meta]]\n\n    if skip_postprocess:\n        warnings.warn('Not all models support export onnx without post '\n                      'process, especially two stage detectors!')\n        model.forward = model.forward_dummy\n        torch.onnx.export(\n            model,\n            one_img,\n            output_file,\n            input_names=['input'],\n            export_params=True,\n            keep_initializers_as_inputs=True,\n            do_constant_folding=True,\n            verbose=show,\n            opset_version=opset_version)\n\n        print(f'Successfully exported ONNX model without '\n              f'post process: {output_file}')\n        return\n\n    # replace original forward function\n    origin_forward = model.forward\n    model.forward = partial(\n        model.forward,\n        img_metas=img_meta_list,\n        return_loss=False,\n        rescale=False)\n\n    output_names = ['dets', 'labels']\n    if model.with_mask:\n        output_names.append('masks')\n    input_name = 'input'\n    dynamic_axes = None\n    if dynamic_export:\n        dynamic_axes = {\n            input_name: {\n                0: 'batch',\n                2: 'height',\n                3: 'width'\n            },\n            'dets': {\n                0: 'batch',\n                1: 'num_dets',\n            },\n            'labels': {\n                0: 'batch',\n                1: 'num_dets',\n            },\n        }\n        if model.with_mask:\n            dynamic_axes['masks'] = {0: 'batch', 1: 'num_dets'}\n\n    torch.onnx.export(\n        model,\n        img_list,\n        output_file,\n        input_names=[input_name],\n        output_names=output_names,\n        export_params=True,\n        keep_initializers_as_inputs=True,\n        do_constant_folding=True,\n        verbose=show,\n        opset_version=opset_version,\n        dynamic_axes=dynamic_axes)\n\n    model.forward = origin_forward\n\n    if do_simplify:\n        import onnxsim\n\n        from mmdet import digit_version\n\n        min_required_version = '0.4.0'\n        assert digit_version(onnxsim.__version__) >= digit_version(\n            min_required_version\n        ), f'Requires to install onnxsim>={min_required_version}'\n\n        model_opt, check_ok = onnxsim.simplify(output_file)\n        if check_ok:\n            onnx.save(model_opt, output_file)\n            print(f'Successfully simplified ONNX model: {output_file}')\n        else:\n            warnings.warn('Failed to simplify ONNX model.')\n    print(f'Successfully exported ONNX model: {output_file}')\n\n    if verify:\n        # check by onnx\n        onnx_model = onnx.load(output_file)\n        onnx.checker.check_model(onnx_model)\n\n        # wrap onnx model\n        onnx_model = ONNXRuntimeDetector(output_file, model.CLASSES, 0)\n        if dynamic_export:\n            # scale up to test dynamic shape\n            h, w = [int((_ * 1.5) // 32 * 32) for _ in input_shape[2:]]\n            h, w = min(1344, h), min(1344, w)\n            input_config['input_shape'] = (1, 3, h, w)\n\n        if test_img is None:\n            input_config['input_path'] = input_img\n\n        # prepare input once again\n        one_img, one_meta = preprocess_example_input(input_config)\n        img_list, img_meta_list = [one_img], [[one_meta]]\n\n        # get pytorch output\n        with torch.no_grad():\n            pytorch_results = model(\n                img_list,\n                img_metas=img_meta_list,\n                return_loss=False,\n                rescale=True)[0]\n\n        img_list = [_.cuda().contiguous() for _ in img_list]\n        if dynamic_export:\n            img_list = img_list + [_.flip(-1).contiguous() for _ in img_list]\n            img_meta_list = img_meta_list * 2\n        # get onnx output\n        onnx_results = onnx_model(\n            img_list, img_metas=img_meta_list, return_loss=False)[0]\n        # visualize predictions\n        score_thr = 0.3\n        if show:\n            out_file_ort, out_file_pt = None, None\n        else:\n            out_file_ort, out_file_pt = 'show-ort.png', 'show-pt.png'\n\n        show_img = one_meta['show_img']\n        model.show_result(\n            show_img,\n            pytorch_results,\n            score_thr=score_thr,\n            show=True,\n            win_name='PyTorch',\n            out_file=out_file_pt)\n        onnx_model.show_result(\n            show_img,\n            onnx_results,\n            score_thr=score_thr,\n            show=True,\n            win_name='ONNXRuntime',\n            out_file=out_file_ort)\n\n        # compare a part of result\n        if model.with_mask:\n            compare_pairs = list(zip(onnx_results, pytorch_results))\n        else:\n            compare_pairs = [(onnx_results, pytorch_results)]\n        err_msg = 'The numerical values are different between Pytorch' + \\\n                  ' and ONNX, but it does not necessarily mean the' + \\\n                  ' exported ONNX model is problematic.'\n        # check the numerical value\n        for onnx_res, pytorch_res in compare_pairs:\n            for o_res, p_res in zip(onnx_res, pytorch_res):\n                np.testing.assert_allclose(\n                    o_res, p_res, rtol=1e-03, atol=1e-05, err_msg=err_msg)\n        print('The numerical values are the same between Pytorch and ONNX')\n\n\ndef parse_normalize_cfg(test_pipeline):\n    transforms = None\n    for pipeline in test_pipeline:\n        if 'transforms' in pipeline:\n            transforms = pipeline['transforms']\n            break\n    assert transforms is not None, 'Failed to find `transforms`'\n    norm_config_li = [_ for _ in transforms if _['type'] == 'Normalize']\n    assert len(norm_config_li) == 1, '`norm_config` should only have one'\n    norm_config = norm_config_li[0]\n    return norm_config\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Convert MMDetection models to ONNX')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument('--input-img', type=str, help='Images for input')\n    parser.add_argument(\n        '--show',\n        action='store_true',\n        help='Show onnx graph and detection outputs')\n    parser.add_argument('--output-file', type=str, default='tmp.onnx')\n    parser.add_argument('--opset-version', type=int, default=11)\n    parser.add_argument(\n        '--test-img', type=str, default=None, help='Images for test')\n    parser.add_argument(\n        '--dataset',\n        type=str,\n        default='coco',\n        help='Dataset name. This argument is deprecated and will be removed \\\n        in future releases.')\n    parser.add_argument(\n        '--verify',\n        action='store_true',\n        help='verify the onnx model output against pytorch output')\n    parser.add_argument(\n        '--simplify',\n        action='store_true',\n        help='Whether to simplify onnx model.')\n    parser.add_argument(\n        '--shape',\n        type=int,\n        nargs='+',\n        default=[800, 1216],\n        help='input image size')\n    parser.add_argument(\n        '--mean',\n        type=float,\n        nargs='+',\n        default=[123.675, 116.28, 103.53],\n        help='mean value used for preprocess input data.This argument \\\n        is deprecated and will be removed in future releases.')\n    parser.add_argument(\n        '--std',\n        type=float,\n        nargs='+',\n        default=[58.395, 57.12, 57.375],\n        help='variance value used for preprocess input data. '\n        'This argument is deprecated and will be removed in future releases.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='Override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--dynamic-export',\n        action='store_true',\n        help='Whether to export onnx with dynamic axis.')\n    parser.add_argument(\n        '--skip-postprocess',\n        action='store_true',\n        help='Whether to export model without post process. Experimental '\n        'option. We do not guarantee the correctness of the exported '\n        'model.')\n    args = parser.parse_args()\n    return args\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    warnings.warn('Arguments like `--mean`, `--std`, `--dataset` would be \\\n        parsed directly from config file and are deprecated and \\\n        will be removed in future releases.')\n\n    assert args.opset_version == 11, 'MMDet only support opset 11 now'\n\n    try:\n        from mmcv.onnx.symbolic import register_extra_symbolics\n    except ModuleNotFoundError:\n        raise NotImplementedError('please update mmcv to version>=v1.0.4')\n    register_extra_symbolics(args.opset_version)\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    if args.shape is None:\n        img_scale = cfg.test_pipeline[1]['img_scale']\n        input_shape = (1, 3, img_scale[1], img_scale[0])\n    elif len(args.shape) == 1:\n        input_shape = (1, 3, args.shape[0], args.shape[0])\n    elif len(args.shape) == 2:\n        input_shape = (1, 3) + tuple(args.shape)\n    else:\n        raise ValueError('invalid input shape')\n\n    # build the model and load checkpoint\n    model = build_model_from_cfg(args.config, args.checkpoint,\n                                 args.cfg_options)\n\n    if not args.input_img:\n        args.input_img = osp.join(osp.dirname(__file__), '../../demo/demo.jpg')\n\n    normalize_cfg = parse_normalize_cfg(cfg.test_pipeline)\n\n    # convert model to onnx file\n    pytorch2onnx(\n        model,\n        args.input_img,\n        input_shape,\n        normalize_cfg,\n        opset_version=args.opset_version,\n        show=args.show,\n        output_file=args.output_file,\n        verify=args.verify,\n        test_img=args.test_img,\n        do_simplify=args.simplify,\n        dynamic_export=args.dynamic_export,\n        skip_postprocess=args.skip_postprocess)\n\n    # Following strings of text style are from colorama package\n    bright_style, reset_style = '\\x1b[1m', '\\x1b[0m'\n    red_text, blue_text = '\\x1b[31m', '\\x1b[34m'\n    white_background = '\\x1b[107m'\n\n    msg = white_background + bright_style + red_text\n    msg += 'DeprecationWarning: This tool will be deprecated in future. '\n    msg += blue_text + 'Welcome to use the unified model deployment toolbox '\n    msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'\n    msg += reset_style\n    warnings.warn(msg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/deployment/test.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport warnings\n\nimport mmcv\nfrom mmcv import Config, DictAction\nfrom mmcv.parallel import MMDataParallel\n\nfrom mmdet.apis import single_gpu_test\nfrom mmdet.datasets import (build_dataloader, build_dataset,\n                            replace_ImageToTensor)\nfrom mmdet.utils import compat_cfg\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet test (and eval) an ONNX model using ONNXRuntime')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('model', help='Input model file')\n    parser.add_argument('--out', help='output result file in pickle format')\n    parser.add_argument(\n        '--format-only',\n        action='store_true',\n        help='Format the output results without perform evaluation. It is'\n        'useful when you want to format the result to a specific format and '\n        'submit it to the test server')\n    parser.add_argument(\n        '--backend',\n        required=True,\n        choices=['onnxruntime', 'tensorrt'],\n        help='Backend for input model to run. ')\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        help='evaluation metrics, which depends on the dataset, e.g., \"bbox\",'\n        ' \"segm\", \"proposal\" for COCO, and \"mAP\", \"recall\" for PASCAL VOC')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--show-dir', help='directory where painted images will be saved')\n    parser.add_argument(\n        '--show-score-thr',\n        type=float,\n        default=0.3,\n        help='score threshold (default: 0.3)')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--eval-options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function')\n\n    args = parser.parse_args()\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    assert args.out or args.eval or args.format_only or args.show \\\n        or args.show_dir, \\\n        ('Please specify at least one operation (save/eval/format/show the '\n         'results / save the results) with the argument \"--out\", \"--eval\"'\n         ', \"--format-only\", \"--show\" or \"--show-dir\"')\n\n    if args.eval and args.format_only:\n        raise ValueError('--eval and --format_only cannot be both specified')\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = Config.fromfile(args.config)\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    cfg = compat_cfg(cfg)\n    # in case the test dataset is concatenated\n    samples_per_gpu = 1\n    if isinstance(cfg.data.test, dict):\n        cfg.data.test.test_mode = True\n        samples_per_gpu = cfg.data.test.pop('samples_per_gpu', 1)\n        if samples_per_gpu > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.test.pipeline = replace_ImageToTensor(\n                cfg.data.test.pipeline)\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            ds_cfg.test_mode = True\n        samples_per_gpu = max(\n            [ds_cfg.pop('samples_per_gpu', 1) for ds_cfg in cfg.data.test])\n        if samples_per_gpu > 1:\n            for ds_cfg in cfg.data.test:\n                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)\n\n    # build the dataloader\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(\n        dataset,\n        samples_per_gpu=samples_per_gpu,\n        workers_per_gpu=cfg.data.workers_per_gpu,\n        dist=False,\n        shuffle=False)\n\n    if args.backend == 'onnxruntime':\n        from mmdet.core.export.model_wrappers import ONNXRuntimeDetector\n        model = ONNXRuntimeDetector(\n            args.model, class_names=dataset.CLASSES, device_id=0)\n    elif args.backend == 'tensorrt':\n        from mmdet.core.export.model_wrappers import TensorRTDetector\n        model = TensorRTDetector(\n            args.model, class_names=dataset.CLASSES, device_id=0)\n\n    model = MMDataParallel(model, device_ids=[0])\n    outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,\n                              args.show_score_thr)\n\n    if args.out:\n        print(f'\\nwriting results to {args.out}')\n        mmcv.dump(outputs, args.out)\n    kwargs = {} if args.eval_options is None else args.eval_options\n    if args.format_only:\n        dataset.format_results(outputs, **kwargs)\n    if args.eval:\n        eval_kwargs = cfg.get('evaluation', {}).copy()\n        # hard-code way to remove EvalHook args\n        for key in [\n                'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',\n                'rule'\n        ]:\n            eval_kwargs.pop(key, None)\n        eval_kwargs.update(dict(metric=args.eval, **kwargs))\n        print(dataset.evaluate(outputs, **eval_kwargs))\n\n\nif __name__ == '__main__':\n    main()\n\n    # Following strings of text style are from colorama package\n    bright_style, reset_style = '\\x1b[1m', '\\x1b[0m'\n    red_text, blue_text = '\\x1b[31m', '\\x1b[34m'\n    white_background = '\\x1b[107m'\n\n    msg = white_background + bright_style + red_text\n    msg += 'DeprecationWarning: This tool will be deprecated in future. '\n    msg += blue_text + 'Welcome to use the unified model deployment toolbox '\n    msg += 'MMDeploy: https://github.com/open-mmlab/mmdeploy'\n    msg += reset_style\n    warnings.warn(msg)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/deployment/test_torchserver.py",
    "content": "from argparse import ArgumentParser\n\nimport numpy as np\nimport requests\n\nfrom mmdet.apis import inference_detector, init_detector, show_result_pyplot\nfrom mmdet.core import bbox2result\n\n\ndef parse_args():\n    parser = ArgumentParser()\n    parser.add_argument('img', help='Image file')\n    parser.add_argument('config', help='Config file')\n    parser.add_argument('checkpoint', help='Checkpoint file')\n    parser.add_argument('model_name', help='The model name in the server')\n    parser.add_argument(\n        '--inference-addr',\n        default='127.0.0.1:8080',\n        help='Address and port of the inference server')\n    parser.add_argument(\n        '--device', default='cuda:0', help='Device used for inference')\n    parser.add_argument(\n        '--score-thr', type=float, default=0.5, help='bbox score threshold')\n    args = parser.parse_args()\n    return args\n\n\ndef parse_result(input, model_class):\n    bbox = []\n    label = []\n    score = []\n    for anchor in input:\n        bbox.append(anchor['bbox'])\n        label.append(model_class.index(anchor['class_name']))\n        score.append([anchor['score']])\n    bboxes = np.append(bbox, score, axis=1)\n    labels = np.array(label)\n    result = bbox2result(bboxes, labels, len(model_class))\n    return result\n\n\ndef main(args):\n    # build the model from a config file and a checkpoint file\n    model = init_detector(args.config, args.checkpoint, device=args.device)\n    # test a single image\n    model_result = inference_detector(model, args.img)\n    for i, anchor_set in enumerate(model_result):\n        anchor_set = anchor_set[anchor_set[:, 4] >= 0.5]\n        model_result[i] = anchor_set\n    # show the results\n    show_result_pyplot(\n        model,\n        args.img,\n        model_result,\n        score_thr=args.score_thr,\n        title='pytorch_result')\n    url = 'http://' + args.inference_addr + '/predictions/' + args.model_name\n    with open(args.img, 'rb') as image:\n        response = requests.post(url, image)\n    server_result = parse_result(response.json(), model.CLASSES)\n    show_result_pyplot(\n        model,\n        args.img,\n        server_result,\n        score_thr=args.score_thr,\n        title='server_result')\n\n    for i in range(len(model.CLASSES)):\n        assert np.allclose(model_result[i], server_result[i])\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    main(args)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/dist_test.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nCHECKPOINT=$2\nGPUS=$3\nNNODES=${NNODES:-1}\nNODE_RANK=${NODE_RANK:-0}\nPORT=${PORT:-29500}\nMASTER_ADDR=${MASTER_ADDR:-\"127.0.0.1\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch \\\n    --nnodes=$NNODES \\\n    --node_rank=$NODE_RANK \\\n    --master_addr=$MASTER_ADDR \\\n    --nproc_per_node=$GPUS \\\n    --master_port=$PORT \\\n    $(dirname \"$0\")/test.py \\\n    $CONFIG \\\n    $CHECKPOINT \\\n    --launcher pytorch \\\n    ${@:4}\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/dist_train.sh",
    "content": "#!/usr/bin/env bash\n\nCONFIG=$1\nGPUS=$2\nNNODES=${NNODES:-1}\nNODE_RANK=${NODE_RANK:-0}\nPORT=${PORT:-29500}\nMASTER_ADDR=${MASTER_ADDR:-\"127.0.0.1\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\npython -m torch.distributed.launch \\\n    --nnodes=$NNODES \\\n    --node_rank=$NODE_RANK \\\n    --master_addr=$MASTER_ADDR \\\n    --nproc_per_node=$GPUS \\\n    --master_port=$PORT \\\n    $(dirname \"$0\")/train.py \\\n    $CONFIG \\\n    --seed 0 \\\n    --launcher pytorch ${@:3}\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/misc/browse_dataset.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nfrom collections import Sequence\nfrom pathlib import Path\n\nimport mmcv\nimport numpy as np\nfrom mmcv import Config, DictAction\n\nfrom mmdet.core.utils import mask2ndarray\nfrom mmdet.core.visualization import imshow_det_bboxes\nfrom mmdet.datasets.builder import build_dataset\nfrom mmdet.utils import replace_cfg_vals, update_data_root\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Browse a dataset')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument(\n        '--skip-type',\n        type=str,\n        nargs='+',\n        default=['DefaultFormatBundle', 'Normalize', 'Collect'],\n        help='skip some useless pipeline')\n    parser.add_argument(\n        '--output-dir',\n        default=None,\n        type=str,\n        help='If there is no display interface, you can save it')\n    parser.add_argument('--not-show', default=False, action='store_true')\n    parser.add_argument(\n        '--show-interval',\n        type=float,\n        default=2,\n        help='the interval of show (s)')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n    return args\n\n\ndef retrieve_data_cfg(config_path, skip_type, cfg_options):\n\n    def skip_pipeline_steps(config):\n        config['pipeline'] = [\n            x for x in config.pipeline if x['type'] not in skip_type\n        ]\n\n    cfg = Config.fromfile(config_path)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if cfg_options is not None:\n        cfg.merge_from_dict(cfg_options)\n    train_data_cfg = cfg.data.train\n    while 'dataset' in train_data_cfg and train_data_cfg[\n            'type'] != 'MultiImageMixDataset':\n        train_data_cfg = train_data_cfg['dataset']\n\n    if isinstance(train_data_cfg, Sequence):\n        [skip_pipeline_steps(c) for c in train_data_cfg]\n    else:\n        skip_pipeline_steps(train_data_cfg)\n\n    return cfg\n\n\ndef main():\n    args = parse_args()\n    cfg = retrieve_data_cfg(args.config, args.skip_type, args.cfg_options)\n\n    if 'gt_semantic_seg' in cfg.train_pipeline[-1]['keys']:\n        cfg.data.train.pipeline = [\n            p for p in cfg.data.train.pipeline if p['type'] != 'SegRescale'\n        ]\n    dataset = build_dataset(cfg.data.train)\n\n    progress_bar = mmcv.ProgressBar(len(dataset))\n\n    for item in dataset:\n        filename = os.path.join(args.output_dir,\n                                Path(item['filename']).name\n                                ) if args.output_dir is not None else None\n\n        gt_bboxes = item['gt_bboxes']\n        gt_labels = item['gt_labels']\n        gt_masks = item.get('gt_masks', None)\n        if gt_masks is not None:\n            gt_masks = mask2ndarray(gt_masks)\n\n        gt_seg = item.get('gt_semantic_seg', None)\n        if gt_seg is not None:\n            pad_value = 255  # the padding value of gt_seg\n            sem_labels = np.unique(gt_seg)\n            all_labels = np.concatenate((gt_labels, sem_labels), axis=0)\n            all_labels, counts = np.unique(all_labels, return_counts=True)\n            stuff_labels = all_labels[np.logical_and(counts < 2,\n                                                     all_labels != pad_value)]\n            stuff_masks = gt_seg[None] == stuff_labels[:, None, None]\n            gt_labels = np.concatenate((gt_labels, stuff_labels), axis=0)\n            gt_masks = np.concatenate((gt_masks, stuff_masks.astype(np.uint8)),\n                                      axis=0)\n            # If you need to show the bounding boxes,\n            # please comment the following line\n            gt_bboxes = None\n\n        imshow_det_bboxes(\n            item['img'],\n            gt_bboxes,\n            gt_labels,\n            gt_masks,\n            class_names=dataset.CLASSES,\n            show=not args.not_show,\n            wait_time=args.show_interval,\n            out_file=filename,\n            bbox_color=dataset.PALETTE,\n            text_color=(200, 200, 200),\n            mask_color=dataset.PALETTE)\n\n        progress_bar.update()\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/misc/download_dataset.py",
    "content": "import argparse\nfrom itertools import repeat\nfrom multiprocessing.pool import ThreadPool\nfrom pathlib import Path\nfrom tarfile import TarFile\nfrom zipfile import ZipFile\n\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Download datasets for training')\n    parser.add_argument(\n        '--dataset-name', type=str, help='dataset name', default='coco2017')\n    parser.add_argument(\n        '--save-dir',\n        type=str,\n        help='the dir to save dataset',\n        default='data/coco')\n    parser.add_argument(\n        '--unzip',\n        action='store_true',\n        help='whether unzip dataset or not, zipped files will be saved')\n    parser.add_argument(\n        '--delete',\n        action='store_true',\n        help='delete the download zipped files')\n    parser.add_argument(\n        '--threads', type=int, help='number of threading', default=4)\n    args = parser.parse_args()\n    return args\n\n\ndef download(url, dir, unzip=True, delete=False, threads=1):\n\n    def download_one(url, dir):\n        f = dir / Path(url).name\n        if Path(url).is_file():\n            Path(url).rename(f)\n        elif not f.exists():\n            print('Downloading {} to {}'.format(url, f))\n            torch.hub.download_url_to_file(url, f, progress=True)\n        if unzip and f.suffix in ('.zip', '.tar'):\n            print('Unzipping {}'.format(f.name))\n            if f.suffix == '.zip':\n                ZipFile(f).extractall(path=dir)\n            elif f.suffix == '.tar':\n                TarFile(f).extractall(path=dir)\n            if delete:\n                f.unlink()\n                print('Delete {}'.format(f))\n\n    dir = Path(dir)\n    if threads > 1:\n        pool = ThreadPool(threads)\n        pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))\n        pool.close()\n        pool.join()\n    else:\n        for u in [url] if isinstance(url, (str, Path)) else url:\n            download_one(u, dir)\n\n\ndef main():\n    args = parse_args()\n    path = Path(args.save_dir)\n    if not path.exists():\n        path.mkdir(parents=True, exist_ok=True)\n    data2url = dict(\n        # TODO: Support for downloading Panoptic Segmentation of COCO\n        coco2017=[\n            'http://images.cocodataset.org/zips/train2017.zip',\n            'http://images.cocodataset.org/zips/val2017.zip',\n            'http://images.cocodataset.org/zips/test2017.zip',\n            'http://images.cocodataset.org/annotations/' +\n            'annotations_trainval2017.zip'\n        ],\n        lvis=[\n            'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip',  # noqa\n            'https://s3-us-west-2.amazonaws.com/dl.fbaipublicfiles.com/LVIS/lvis_v1_train.json.zip',  # noqa\n        ],\n        voc2007=[\n            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar',  # noqa\n            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',  # noqa\n            'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCdevkit_08-Jun-2007.tar',  # noqa\n        ],\n    )\n    url = data2url.get(args.dataset_name, None)\n    if url is None:\n        print('Only support COCO, VOC, and LVIS now!')\n        return\n    download(\n        url,\n        dir=path,\n        unzip=args.unzip,\n        delete=args.delete,\n        threads=args.threads)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/misc/gen_coco_panoptic_test_info.py",
    "content": "import argparse\nimport os.path as osp\n\nimport mmcv\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Generate COCO test image information '\n        'for COCO panoptic segmentation.')\n    parser.add_argument('data_root', help='Path to COCO annotation directory.')\n    args = parser.parse_args()\n\n    return args\n\n\ndef main():\n    args = parse_args()\n    data_root = args.data_root\n    val_info = mmcv.load(osp.join(data_root, 'panoptic_val2017.json'))\n    test_old_info = mmcv.load(\n        osp.join(data_root, 'image_info_test-dev2017.json'))\n\n    # replace categories from image_info_test-dev2017.json\n    # with categories from panoptic_val2017.json which\n    # has attribute `isthing`.\n    test_info = test_old_info\n    test_info.update({'categories': val_info['categories']})\n    mmcv.dump(test_info,\n              osp.join(data_root, 'panoptic_image_info_test-dev2017.json'))\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/misc/get_image_metas.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\n\"\"\"Get test image metas on a specific dataset.\n\nHere is an example to run this script.\n\nExample:\n    python tools/misc/get_image_metas.py ${CONFIG} \\\n    --out ${OUTPUT FILE NAME}\n\"\"\"\nimport argparse\nimport csv\nimport os.path as osp\nfrom multiprocessing import Pool\n\nimport mmcv\nfrom mmcv import Config\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Collect image metas')\n    parser.add_argument('config', help='Config file path')\n    parser.add_argument(\n        '--out',\n        default='validation-image-metas.pkl',\n        help='The output image metas file name. The save dir is in the '\n        'same directory as `dataset.ann_file` path')\n    parser.add_argument(\n        '--nproc',\n        default=4,\n        type=int,\n        help='Processes used for get image metas')\n    args = parser.parse_args()\n    return args\n\n\ndef get_metas_from_csv_style_ann_file(ann_file):\n    data_infos = []\n    cp_filename = None\n    with open(ann_file, 'r') as f:\n        reader = csv.reader(f)\n        for i, line in enumerate(reader):\n            if i == 0:\n                continue\n            img_id = line[0]\n            filename = f'{img_id}.jpg'\n            if filename != cp_filename:\n                data_infos.append(dict(filename=filename))\n                cp_filename = filename\n    return data_infos\n\n\ndef get_metas_from_txt_style_ann_file(ann_file):\n    with open(ann_file) as f:\n        lines = f.readlines()\n    i = 0\n    data_infos = []\n    while i < len(lines):\n        filename = lines[i].rstrip()\n        data_infos.append(dict(filename=filename))\n        skip_lines = int(lines[i + 2]) + 3\n        i += skip_lines\n    return data_infos\n\n\ndef get_image_metas(data_info, img_prefix):\n    file_client = mmcv.FileClient(backend='disk')\n    filename = data_info.get('filename', None)\n    if filename is not None:\n        if img_prefix is not None:\n            filename = osp.join(img_prefix, filename)\n        img_bytes = file_client.get(filename)\n        img = mmcv.imfrombytes(img_bytes, flag='color')\n        meta = dict(filename=filename, ori_shape=img.shape)\n    else:\n        raise NotImplementedError('Missing `filename` in data_info')\n    return meta\n\n\ndef main():\n    args = parse_args()\n    assert args.out.endswith('pkl'), 'The output file name must be pkl suffix'\n\n    # load config files\n    cfg = Config.fromfile(args.config)\n    ann_file = cfg.data.test.ann_file\n    img_prefix = cfg.data.test.img_prefix\n\n    print(f'{\"-\" * 5} Start Processing {\"-\" * 5}')\n    if ann_file.endswith('csv'):\n        data_infos = get_metas_from_csv_style_ann_file(ann_file)\n    elif ann_file.endswith('txt'):\n        data_infos = get_metas_from_txt_style_ann_file(ann_file)\n    else:\n        shuffix = ann_file.split('.')[-1]\n        raise NotImplementedError('File name must be csv or txt suffix but '\n                                  f'get {shuffix}')\n\n    print(f'Successfully load annotation file from {ann_file}')\n    print(f'Processing {len(data_infos)} images...')\n    pool = Pool(args.nproc)\n    # get image metas with multiple processes\n    image_metas = pool.starmap(\n        get_image_metas,\n        zip(data_infos, [img_prefix for _ in range(len(data_infos))]),\n    )\n    pool.close()\n\n    # save image metas\n    root_path = cfg.data.test.ann_file.rsplit('/', 1)[0]\n    save_path = osp.join(root_path, args.out)\n    mmcv.dump(image_metas, save_path)\n    print(f'Image meta file save to: {save_path}')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/misc/print_config.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport warnings\n\nfrom mmcv import Config, DictAction\n\nfrom mmdet.utils import replace_cfg_vals, update_data_root\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Print the whole config')\n    parser.add_argument('config', help='config file path')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file (deprecate), '\n        'change to --cfg-options instead.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    args = parser.parse_args()\n\n    if args.options and args.cfg_options:\n        raise ValueError(\n            '--options and --cfg-options cannot be both '\n            'specified, --options is deprecated in favor of --cfg-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --cfg-options')\n        args.cfg_options = args.options\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n    print(f'Config:\\n{cfg.pretty_text}')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/misc/split_coco.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os.path as osp\n\nimport mmcv\nimport numpy as np\n\nprog_description = '''K-Fold coco split.\n\nTo split coco data for semi-supervised object detection:\n    python tools/misc/split_coco.py\n'''\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser()\n    parser.add_argument(\n        '--data-root',\n        type=str,\n        help='The data root of coco dataset.',\n        default='./data/coco/')\n    parser.add_argument(\n        '--out-dir',\n        type=str,\n        help='The output directory of coco semi-supervised annotations.',\n        default='./data/coco_semi_annos/')\n    parser.add_argument(\n        '--labeled-percent',\n        type=float,\n        nargs='+',\n        help='The percentage of labeled data in the training set.',\n        default=[1, 2, 5, 10])\n    parser.add_argument(\n        '--fold',\n        type=int,\n        help='K-fold cross validation for semi-supervised object detection.',\n        default=5)\n    args = parser.parse_args()\n    return args\n\n\ndef split_coco(data_root, out_dir, percent, fold):\n    \"\"\"Split COCO data for Semi-supervised object detection.\n\n    Args:\n        data_root (str): The data root of coco dataset.\n        out_dir (str): The output directory of coco semi-supervised\n            annotations.\n        percent (float): The percentage of labeled data in the training set.\n        fold (int): The fold of dataset and set as random seed for data split.\n    \"\"\"\n\n    def save_anns(name, images, annotations):\n        sub_anns = dict()\n        sub_anns['images'] = images\n        sub_anns['annotations'] = annotations\n        sub_anns['licenses'] = anns['licenses']\n        sub_anns['categories'] = anns['categories']\n        sub_anns['info'] = anns['info']\n\n        mmcv.mkdir_or_exist(out_dir)\n        mmcv.dump(sub_anns, f'{out_dir}/{name}.json')\n\n    # set random seed with the fold\n    np.random.seed(fold)\n    ann_file = osp.join(data_root, 'annotations/instances_train2017.json')\n    anns = mmcv.load(ann_file)\n\n    image_list = anns['images']\n    labeled_total = int(percent / 100. * len(image_list))\n    labeled_inds = set(\n        np.random.choice(range(len(image_list)), size=labeled_total))\n    labeled_ids, labeled_images, unlabeled_images = [], [], []\n\n    for i in range(len(image_list)):\n        if i in labeled_inds:\n            labeled_images.append(image_list[i])\n            labeled_ids.append(image_list[i]['id'])\n        else:\n            unlabeled_images.append(image_list[i])\n\n    # get all annotations of labeled images\n    labeled_ids = set(labeled_ids)\n    labeled_annotations, unlabeled_annotations = [], []\n\n    for ann in anns['annotations']:\n        if ann['image_id'] in labeled_ids:\n            labeled_annotations.append(ann)\n        else:\n            unlabeled_annotations.append(ann)\n\n    # save labeled and unlabeled\n    labeled_name = f'instances_train2017.{fold}@{percent}'\n    unlabeled_name = f'instances_train2017.{fold}@{percent}-unlabeled'\n\n    save_anns(labeled_name, labeled_images, labeled_annotations)\n    save_anns(unlabeled_name, unlabeled_images, unlabeled_annotations)\n\n\ndef multi_wrapper(args):\n    return split_coco(*args)\n\n\nif __name__ == '__main__':\n    args = parse_args()\n    arguments_list = [(args.data_root, args.out_dir, p, f)\n                      for f in range(1, args.fold + 1)\n                      for p in args.labeled_percent]\n    mmcv.track_parallel_progress(multi_wrapper, arguments_list, args.fold)\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/model_converters/detectron2pytorch.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nfrom collections import OrderedDict\n\nimport mmcv\nimport torch\n\narch_settings = {50: (3, 4, 6, 3), 101: (3, 4, 23, 3)}\n\n\ndef convert_bn(blobs, state_dict, caffe_name, torch_name, converted_names):\n    # detectron replace bn with affine channel layer\n    state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +\n                                                              '_b'])\n    state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +\n                                                                '_s'])\n    bn_size = state_dict[torch_name + '.weight'].size()\n    state_dict[torch_name + '.running_mean'] = torch.zeros(bn_size)\n    state_dict[torch_name + '.running_var'] = torch.ones(bn_size)\n    converted_names.add(caffe_name + '_b')\n    converted_names.add(caffe_name + '_s')\n\n\ndef convert_conv_fc(blobs, state_dict, caffe_name, torch_name,\n                    converted_names):\n    state_dict[torch_name + '.weight'] = torch.from_numpy(blobs[caffe_name +\n                                                                '_w'])\n    converted_names.add(caffe_name + '_w')\n    if caffe_name + '_b' in blobs:\n        state_dict[torch_name + '.bias'] = torch.from_numpy(blobs[caffe_name +\n                                                                  '_b'])\n        converted_names.add(caffe_name + '_b')\n\n\ndef convert(src, dst, depth):\n    \"\"\"Convert keys in detectron pretrained ResNet models to pytorch style.\"\"\"\n    # load arch_settings\n    if depth not in arch_settings:\n        raise ValueError('Only support ResNet-50 and ResNet-101 currently')\n    block_nums = arch_settings[depth]\n    # load caffe model\n    caffe_model = mmcv.load(src, encoding='latin1')\n    blobs = caffe_model['blobs'] if 'blobs' in caffe_model else caffe_model\n    # convert to pytorch style\n    state_dict = OrderedDict()\n    converted_names = set()\n    convert_conv_fc(blobs, state_dict, 'conv1', 'conv1', converted_names)\n    convert_bn(blobs, state_dict, 'res_conv1_bn', 'bn1', converted_names)\n    for i in range(1, len(block_nums) + 1):\n        for j in range(block_nums[i - 1]):\n            if j == 0:\n                convert_conv_fc(blobs, state_dict, f'res{i + 1}_{j}_branch1',\n                                f'layer{i}.{j}.downsample.0', converted_names)\n                convert_bn(blobs, state_dict, f'res{i + 1}_{j}_branch1_bn',\n                           f'layer{i}.{j}.downsample.1', converted_names)\n            for k, letter in enumerate(['a', 'b', 'c']):\n                convert_conv_fc(blobs, state_dict,\n                                f'res{i + 1}_{j}_branch2{letter}',\n                                f'layer{i}.{j}.conv{k+1}', converted_names)\n                convert_bn(blobs, state_dict,\n                           f'res{i + 1}_{j}_branch2{letter}_bn',\n                           f'layer{i}.{j}.bn{k + 1}', converted_names)\n    # check if all layers are converted\n    for key in blobs:\n        if key not in converted_names:\n            print(f'Not Convert: {key}')\n    # save checkpoint\n    checkpoint = dict()\n    checkpoint['state_dict'] = state_dict\n    torch.save(checkpoint, dst)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Convert model keys')\n    parser.add_argument('src', help='src detectron model path')\n    parser.add_argument('dst', help='save path')\n    parser.add_argument('depth', type=int, help='ResNet model depth')\n    args = parser.parse_args()\n    convert(args.src, args.dst, args.depth)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/model_converters/publish_model.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport subprocess\n\nimport torch\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='Process a checkpoint to be published')\n    parser.add_argument('in_file', help='input checkpoint filename')\n    parser.add_argument('out_file', help='output checkpoint filename')\n    args = parser.parse_args()\n    return args\n\n\ndef process_checkpoint(in_file, out_file):\n    checkpoint = torch.load(in_file, map_location='cpu')\n    # remove optimizer for smaller file size\n    if 'optimizer' in checkpoint:\n        del checkpoint['optimizer']\n    # if it is necessary to remove some sensitive data in checkpoint['meta'],\n    # add the code here.\n    if torch.__version__ >= '1.6':\n        torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)\n    else:\n        torch.save(checkpoint, out_file)\n    sha = subprocess.check_output(['sha256sum', out_file]).decode()\n    if out_file.endswith('.pth'):\n        out_file_name = out_file[:-4]\n    else:\n        out_file_name = out_file\n    final_file = out_file_name + f'-{sha[:8]}.pth'\n    subprocess.Popen(['mv', out_file, final_file])\n\n\ndef main():\n    args = parse_args()\n    process_checkpoint(args.in_file, args.out_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/model_converters/regnet2mmdet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nfrom collections import OrderedDict\n\nimport torch\n\n\ndef convert_stem(model_key, model_weight, state_dict, converted_names):\n    new_key = model_key.replace('stem.conv', 'conv1')\n    new_key = new_key.replace('stem.bn', 'bn1')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n    print(f'Convert {model_key} to {new_key}')\n\n\ndef convert_head(model_key, model_weight, state_dict, converted_names):\n    new_key = model_key.replace('head.fc', 'fc')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n    print(f'Convert {model_key} to {new_key}')\n\n\ndef convert_reslayer(model_key, model_weight, state_dict, converted_names):\n    split_keys = model_key.split('.')\n    layer, block, module = split_keys[:3]\n    block_id = int(block[1:])\n    layer_name = f'layer{int(layer[1:])}'\n    block_name = f'{block_id - 1}'\n\n    if block_id == 1 and module == 'bn':\n        new_key = f'{layer_name}.{block_name}.downsample.1.{split_keys[-1]}'\n    elif block_id == 1 and module == 'proj':\n        new_key = f'{layer_name}.{block_name}.downsample.0.{split_keys[-1]}'\n    elif module == 'f':\n        if split_keys[3] == 'a_bn':\n            module_name = 'bn1'\n        elif split_keys[3] == 'b_bn':\n            module_name = 'bn2'\n        elif split_keys[3] == 'c_bn':\n            module_name = 'bn3'\n        elif split_keys[3] == 'a':\n            module_name = 'conv1'\n        elif split_keys[3] == 'b':\n            module_name = 'conv2'\n        elif split_keys[3] == 'c':\n            module_name = 'conv3'\n        new_key = f'{layer_name}.{block_name}.{module_name}.{split_keys[-1]}'\n    else:\n        raise ValueError(f'Unsupported conversion of key {model_key}')\n    print(f'Convert {model_key} to {new_key}')\n    state_dict[new_key] = model_weight\n    converted_names.add(model_key)\n\n\ndef convert(src, dst):\n    \"\"\"Convert keys in pycls pretrained RegNet models to mmdet style.\"\"\"\n    # load caffe model\n    regnet_model = torch.load(src)\n    blobs = regnet_model['model_state']\n    # convert to pytorch style\n    state_dict = OrderedDict()\n    converted_names = set()\n    for key, weight in blobs.items():\n        if 'stem' in key:\n            convert_stem(key, weight, state_dict, converted_names)\n        elif 'head' in key:\n            convert_head(key, weight, state_dict, converted_names)\n        elif key.startswith('s'):\n            convert_reslayer(key, weight, state_dict, converted_names)\n\n    # check if all layers are converted\n    for key in blobs:\n        if key not in converted_names:\n            print(f'not converted: {key}')\n    # save checkpoint\n    checkpoint = dict()\n    checkpoint['state_dict'] = state_dict\n    torch.save(checkpoint, dst)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Convert model keys')\n    parser.add_argument('src', help='src detectron model path')\n    parser.add_argument('dst', help='save path')\n    args = parser.parse_args()\n    convert(args.src, args.dst)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/model_converters/selfsup2mmdet.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nfrom collections import OrderedDict\n\nimport torch\n\n\ndef moco_convert(src, dst):\n    \"\"\"Convert keys in pycls pretrained moco models to mmdet style.\"\"\"\n    # load caffe model\n    moco_model = torch.load(src)\n    blobs = moco_model['state_dict']\n    # convert to pytorch style\n    state_dict = OrderedDict()\n    for k, v in blobs.items():\n        if not k.startswith('module.encoder_q.'):\n            continue\n        old_k = k\n        k = k.replace('module.encoder_q.', '')\n        state_dict[k] = v\n        print(old_k, '->', k)\n    # save checkpoint\n    checkpoint = dict()\n    checkpoint['state_dict'] = state_dict\n    torch.save(checkpoint, dst)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Convert model keys')\n    parser.add_argument('src', help='src detectron model path')\n    parser.add_argument('dst', help='save path')\n    parser.add_argument(\n        '--selfsup', type=str, choices=['moco', 'swav'], help='save path')\n    args = parser.parse_args()\n    if args.selfsup == 'moco':\n        moco_convert(args.src, args.dst)\n    elif args.selfsup == 'swav':\n        print('SWAV does not need to convert the keys')\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/model_converters/upgrade_model_version.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport re\nimport tempfile\nfrom collections import OrderedDict\n\nimport torch\nfrom mmcv import Config\n\n\ndef is_head(key):\n    valid_head_list = [\n        'bbox_head', 'mask_head', 'semantic_head', 'grid_head', 'mask_iou_head'\n    ]\n\n    return any(key.startswith(h) for h in valid_head_list)\n\n\ndef parse_config(config_strings):\n    temp_file = tempfile.NamedTemporaryFile()\n    config_path = f'{temp_file.name}.py'\n    with open(config_path, 'w') as f:\n        f.write(config_strings)\n\n    config = Config.fromfile(config_path)\n    is_two_stage = True\n    is_ssd = False\n    is_retina = False\n    reg_cls_agnostic = False\n    if 'rpn_head' not in config.model:\n        is_two_stage = False\n        # check whether it is SSD\n        if config.model.bbox_head.type == 'SSDHead':\n            is_ssd = True\n        elif config.model.bbox_head.type == 'RetinaHead':\n            is_retina = True\n    elif isinstance(config.model['bbox_head'], list):\n        reg_cls_agnostic = True\n    elif 'reg_class_agnostic' in config.model.bbox_head:\n        reg_cls_agnostic = config.model.bbox_head \\\n            .reg_class_agnostic\n    temp_file.close()\n    return is_two_stage, is_ssd, is_retina, reg_cls_agnostic\n\n\ndef reorder_cls_channel(val, num_classes=81):\n    # bias\n    if val.dim() == 1:\n        new_val = torch.cat((val[1:], val[:1]), dim=0)\n    # weight\n    else:\n        out_channels, in_channels = val.shape[:2]\n        # conv_cls for softmax output\n        if out_channels != num_classes and out_channels % num_classes == 0:\n            new_val = val.reshape(-1, num_classes, in_channels, *val.shape[2:])\n            new_val = torch.cat((new_val[:, 1:], new_val[:, :1]), dim=1)\n            new_val = new_val.reshape(val.size())\n        # fc_cls\n        elif out_channels == num_classes:\n            new_val = torch.cat((val[1:], val[:1]), dim=0)\n        # agnostic | retina_cls | rpn_cls\n        else:\n            new_val = val\n\n    return new_val\n\n\ndef truncate_cls_channel(val, num_classes=81):\n\n    # bias\n    if val.dim() == 1:\n        if val.size(0) % num_classes == 0:\n            new_val = val[:num_classes - 1]\n        else:\n            new_val = val\n    # weight\n    else:\n        out_channels, in_channels = val.shape[:2]\n        # conv_logits\n        if out_channels % num_classes == 0:\n            new_val = val.reshape(num_classes, in_channels, *val.shape[2:])[1:]\n            new_val = new_val.reshape(-1, *val.shape[1:])\n        # agnostic\n        else:\n            new_val = val\n\n    return new_val\n\n\ndef truncate_reg_channel(val, num_classes=81):\n    # bias\n    if val.dim() == 1:\n        # fc_reg | rpn_reg\n        if val.size(0) % num_classes == 0:\n            new_val = val.reshape(num_classes, -1)[:num_classes - 1]\n            new_val = new_val.reshape(-1)\n        # agnostic\n        else:\n            new_val = val\n    # weight\n    else:\n        out_channels, in_channels = val.shape[:2]\n        # fc_reg | rpn_reg\n        if out_channels % num_classes == 0:\n            new_val = val.reshape(num_classes, -1, in_channels,\n                                  *val.shape[2:])[1:]\n            new_val = new_val.reshape(-1, *val.shape[1:])\n        # agnostic\n        else:\n            new_val = val\n\n    return new_val\n\n\ndef convert(in_file, out_file, num_classes):\n    \"\"\"Convert keys in checkpoints.\n\n    There can be some breaking changes during the development of mmdetection,\n    and this tool is used for upgrading checkpoints trained with old versions\n    to the latest one.\n    \"\"\"\n    checkpoint = torch.load(in_file)\n    in_state_dict = checkpoint.pop('state_dict')\n    out_state_dict = OrderedDict()\n    meta_info = checkpoint['meta']\n    is_two_stage, is_ssd, is_retina, reg_cls_agnostic = parse_config(\n        '#' + meta_info['config'])\n    if meta_info['mmdet_version'] <= '0.5.3' and is_retina:\n        upgrade_retina = True\n    else:\n        upgrade_retina = False\n\n    # MMDetection v2.5.0 unifies the class order in RPN\n    # if the model is trained in version<v2.5.0\n    # The RPN model should be upgraded to be used in version>=2.5.0\n    if meta_info['mmdet_version'] < '2.5.0':\n        upgrade_rpn = True\n    else:\n        upgrade_rpn = False\n\n    for key, val in in_state_dict.items():\n        new_key = key\n        new_val = val\n        if is_two_stage and is_head(key):\n            new_key = 'roi_head.{}'.format(key)\n\n        # classification\n        if upgrade_rpn:\n            m = re.search(\n                r'(conv_cls|retina_cls|rpn_cls|fc_cls|fcos_cls|'\n                r'fovea_cls).(weight|bias)', new_key)\n        else:\n            m = re.search(\n                r'(conv_cls|retina_cls|fc_cls|fcos_cls|'\n                r'fovea_cls).(weight|bias)', new_key)\n        if m is not None:\n            print(f'reorder cls channels of {new_key}')\n            new_val = reorder_cls_channel(val, num_classes)\n\n        # regression\n        if upgrade_rpn:\n            m = re.search(r'(fc_reg).(weight|bias)', new_key)\n        else:\n            m = re.search(r'(fc_reg|rpn_reg).(weight|bias)', new_key)\n        if m is not None and not reg_cls_agnostic:\n            print(f'truncate regression channels of {new_key}')\n            new_val = truncate_reg_channel(val, num_classes)\n\n        # mask head\n        m = re.search(r'(conv_logits).(weight|bias)', new_key)\n        if m is not None:\n            print(f'truncate mask prediction channels of {new_key}')\n            new_val = truncate_cls_channel(val, num_classes)\n\n        m = re.search(r'(cls_convs|reg_convs).\\d.(weight|bias)', key)\n        # Legacy issues in RetinaNet since V1.x\n        # Use ConvModule instead of nn.Conv2d in RetinaNet\n        # cls_convs.0.weight -> cls_convs.0.conv.weight\n        if m is not None and upgrade_retina:\n            param = m.groups()[1]\n            new_key = key.replace(param, f'conv.{param}')\n            out_state_dict[new_key] = val\n            print(f'rename the name of {key} to {new_key}')\n            continue\n\n        m = re.search(r'(cls_convs).\\d.(weight|bias)', key)\n        if m is not None and is_ssd:\n            print(f'reorder cls channels of {new_key}')\n            new_val = reorder_cls_channel(val, num_classes)\n\n        out_state_dict[new_key] = new_val\n    checkpoint['state_dict'] = out_state_dict\n    torch.save(checkpoint, out_file)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Upgrade model version')\n    parser.add_argument('in_file', help='input checkpoint file')\n    parser.add_argument('out_file', help='output checkpoint file')\n    parser.add_argument(\n        '--num-classes',\n        type=int,\n        default=81,\n        help='number of classes of the original model')\n    args = parser.parse_args()\n    convert(args.in_file, args.out_file, args.num_classes)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/model_converters/upgrade_ssd_version.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport tempfile\nfrom collections import OrderedDict\n\nimport torch\nfrom mmcv import Config\n\n\ndef parse_config(config_strings):\n    temp_file = tempfile.NamedTemporaryFile()\n    config_path = f'{temp_file.name}.py'\n    with open(config_path, 'w') as f:\n        f.write(config_strings)\n\n    config = Config.fromfile(config_path)\n    # check whether it is SSD\n    if config.model.bbox_head.type != 'SSDHead':\n        raise AssertionError('This is not a SSD model.')\n\n\ndef convert(in_file, out_file):\n    checkpoint = torch.load(in_file)\n    in_state_dict = checkpoint.pop('state_dict')\n    out_state_dict = OrderedDict()\n    meta_info = checkpoint['meta']\n    parse_config('#' + meta_info['config'])\n    for key, value in in_state_dict.items():\n        if 'extra' in key:\n            layer_idx = int(key.split('.')[2])\n            new_key = 'neck.extra_layers.{}.{}.conv.'.format(\n                layer_idx // 2, layer_idx % 2) + key.split('.')[-1]\n        elif 'l2_norm' in key:\n            new_key = 'neck.l2_norm.weight'\n        elif 'bbox_head' in key:\n            new_key = key[:21] + '.0' + key[21:]\n        else:\n            new_key = key\n        out_state_dict[new_key] = value\n    checkpoint['state_dict'] = out_state_dict\n\n    if torch.__version__ >= '1.6':\n        torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)\n    else:\n        torch.save(checkpoint, out_file)\n\n\ndef main():\n    parser = argparse.ArgumentParser(description='Upgrade SSD version')\n    parser.add_argument('in_file', help='input checkpoint file')\n    parser.add_argument('out_file', help='output checkpoint file')\n\n    args = parser.parse_args()\n    convert(args.in_file, args.out_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/slurm_test.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nCHECKPOINT=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nPY_ARGS=${@:5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/test.py ${CONFIG} ${CHECKPOINT} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/slurm_train.sh",
    "content": "#!/usr/bin/env bash\n\nset -x\n\nPARTITION=$1\nJOB_NAME=$2\nCONFIG=$3\nWORK_DIR=$4\nGPUS=${GPUS:-8}\nGPUS_PER_NODE=${GPUS_PER_NODE:-8}\nCPUS_PER_TASK=${CPUS_PER_TASK:-5}\nSRUN_ARGS=${SRUN_ARGS:-\"\"}\nPY_ARGS=${@:5}\n\nPYTHONPATH=\"$(dirname $0)/..\":$PYTHONPATH \\\nsrun -p ${PARTITION} \\\n    --job-name=${JOB_NAME} \\\n    --gres=gpu:${GPUS_PER_NODE} \\\n    --ntasks=${GPUS} \\\n    --ntasks-per-node=${GPUS_PER_NODE} \\\n    --cpus-per-task=${CPUS_PER_TASK} \\\n    --kill-on-bad-exit=1 \\\n    ${SRUN_ARGS} \\\n    python -u tools/train.py ${CONFIG} --work-dir=${WORK_DIR} --launcher=\"slurm\" ${PY_ARGS}\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/test.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport os\nimport os.path as osp\nimport time\nimport warnings\n\nimport mmcv\nimport torch\nfrom mmcv import Config, DictAction\nfrom mmcv.cnn import fuse_conv_bn\nfrom mmcv.runner import (get_dist_info, init_dist, load_checkpoint,\n                         wrap_fp16_model)\n\nfrom mmdet.apis import multi_gpu_test, single_gpu_test\nfrom mmdet.datasets import (build_dataloader, build_dataset,\n                            replace_ImageToTensor)\nfrom mmdet.models import build_detector\nfrom mmdet.utils import (build_ddp, build_dp, compat_cfg, get_device,\n                         replace_cfg_vals, setup_multi_processes,\n                         update_data_root)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(\n        description='MMDet test (and eval) a model')\n    parser.add_argument('config', help='test config file path')\n    parser.add_argument('checkpoint', help='checkpoint file')\n    parser.add_argument(\n        '--work-dir',\n        help='the directory to save the file containing evaluation metrics')\n    parser.add_argument('--out', help='output result file in pickle format')\n    parser.add_argument(\n        '--fuse-conv-bn',\n        action='store_true',\n        help='Whether to fuse conv and bn, this will slightly increase'\n        'the inference speed')\n    parser.add_argument(\n        '--gpu-ids',\n        type=int,\n        nargs='+',\n        help='(Deprecated, please use --gpu-id) ids of gpus to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument(\n        '--gpu-id',\n        type=int,\n        default=0,\n        help='id of gpu to use '\n        '(only applicable to non-distributed testing)')\n    parser.add_argument(\n        '--format-only',\n        action='store_true',\n        help='Format the output results without perform evaluation. It is'\n        'useful when you want to format the result to a specific format and '\n        'submit it to the test server')\n    parser.add_argument(\n        '--eval',\n        type=str,\n        nargs='+',\n        help='evaluation metrics, which depends on the dataset, e.g., \"bbox\",'\n        ' \"segm\", \"proposal\" for COCO, and \"mAP\", \"recall\" for PASCAL VOC')\n    parser.add_argument('--show', action='store_true', help='show results')\n    parser.add_argument(\n        '--show-dir', help='directory where painted images will be saved')\n    parser.add_argument(\n        '--show-score-thr',\n        type=float,\n        default=0.3,\n        help='score threshold (default: 0.3)')\n    parser.add_argument(\n        '--gpu-collect',\n        action='store_true',\n        help='whether to use gpu to collect results.')\n    parser.add_argument(\n        '--tmpdir',\n        help='tmp directory used for collecting results from multiple '\n        'workers, available when gpu-collect is not specified')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function (deprecate), '\n        'change to --eval-options instead.')\n    parser.add_argument(\n        '--eval-options',\n        nargs='+',\n        action=DictAction,\n        help='custom options for evaluation, the key-value pair in xxx=yyy '\n        'format will be kwargs for dataset.evaluate() function')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.eval_options:\n        raise ValueError(\n            '--options and --eval-options cannot be both '\n            'specified, --options is deprecated in favor of --eval-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --eval-options')\n        args.eval_options = args.options\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    assert args.out or args.eval or args.format_only or args.show \\\n        or args.show_dir, \\\n        ('Please specify at least one operation (save/eval/format/show the '\n         'results / save the results) with the argument \"--out\", \"--eval\"'\n         ', \"--format-only\", \"--show\" or \"--show-dir\"')\n\n    if args.eval and args.format_only:\n        raise ValueError('--eval and --format_only cannot be both specified')\n\n    if args.out is not None and not args.out.endswith(('.pkl', '.pickle')):\n        raise ValueError('The output file must be a pkl file.')\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    cfg = compat_cfg(cfg)\n\n    # set multi-process settings\n    setup_multi_processes(cfg)\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    if 'pretrained' in cfg.model:\n        cfg.model.pretrained = None\n    elif 'init_cfg' in cfg.model.backbone:\n        cfg.model.backbone.init_cfg = None\n\n    if cfg.model.get('neck'):\n        if isinstance(cfg.model.neck, list):\n            for neck_cfg in cfg.model.neck:\n                if neck_cfg.get('rfp_backbone'):\n                    if neck_cfg.rfp_backbone.get('pretrained'):\n                        neck_cfg.rfp_backbone.pretrained = None\n        elif cfg.model.neck.get('rfp_backbone'):\n            if cfg.model.neck.rfp_backbone.get('pretrained'):\n                cfg.model.neck.rfp_backbone.pretrained = None\n\n    if args.gpu_ids is not None:\n        cfg.gpu_ids = args.gpu_ids[0:1]\n        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '\n                      'Because we only support single GPU mode in '\n                      'non-distributed testing. Use the first GPU '\n                      'in `gpu_ids` now.')\n    else:\n        cfg.gpu_ids = [args.gpu_id]\n    cfg.device = get_device()\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n\n    test_dataloader_default_args = dict(\n        samples_per_gpu=1, workers_per_gpu=2, dist=distributed, shuffle=False)\n\n    # in case the test dataset is concatenated\n    if isinstance(cfg.data.test, dict):\n        cfg.data.test.test_mode = True\n        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:\n            # Replace 'ImageToTensor' to 'DefaultFormatBundle'\n            cfg.data.test.pipeline = replace_ImageToTensor(\n                cfg.data.test.pipeline)\n    elif isinstance(cfg.data.test, list):\n        for ds_cfg in cfg.data.test:\n            ds_cfg.test_mode = True\n        if cfg.data.test_dataloader.get('samples_per_gpu', 1) > 1:\n            for ds_cfg in cfg.data.test:\n                ds_cfg.pipeline = replace_ImageToTensor(ds_cfg.pipeline)\n\n    test_loader_cfg = {\n        **test_dataloader_default_args,\n        **cfg.data.get('test_dataloader', {})\n    }\n\n    rank, _ = get_dist_info()\n    # allows not to create\n    if args.work_dir is not None and rank == 0:\n        mmcv.mkdir_or_exist(osp.abspath(args.work_dir))\n        timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())\n        json_file = osp.join(args.work_dir, f'eval_{timestamp}.json')\n\n    # build the dataloader\n    dataset = build_dataset(cfg.data.test)\n    data_loader = build_dataloader(dataset, **test_loader_cfg)\n\n    # build the model and load checkpoint\n    cfg.model.train_cfg = None\n    model = build_detector(cfg.model, test_cfg=cfg.get('test_cfg'))\n    fp16_cfg = cfg.get('fp16', None)\n    if fp16_cfg is None and cfg.get('device', None) == 'npu':\n        fp16_cfg = dict(loss_scale='dynamic')\n    if fp16_cfg is not None:\n        wrap_fp16_model(model)\n    checkpoint = load_checkpoint(model, args.checkpoint, map_location='cpu')\n    if args.fuse_conv_bn:\n        model = fuse_conv_bn(model)\n    # old versions did not save class info in checkpoints, this walkaround is\n    # for backward compatibility\n    if 'CLASSES' in checkpoint.get('meta', {}):\n        model.CLASSES = checkpoint['meta']['CLASSES']\n    else:\n        model.CLASSES = dataset.CLASSES\n\n    if not distributed:\n        model = build_dp(model, cfg.device, device_ids=cfg.gpu_ids)\n        outputs = single_gpu_test(model, data_loader, args.show, args.show_dir,\n                                  args.show_score_thr)\n    else:\n        model = build_ddp(\n            model,\n            cfg.device,\n            device_ids=[int(os.environ['LOCAL_RANK'])],\n            broadcast_buffers=False)\n\n        # In multi_gpu_test, if tmpdir is None, some tesnors\n        # will init on cuda by default, and no device choice supported.\n        # Init a tmpdir to avoid error on npu here.\n        if cfg.device == 'npu' and args.tmpdir is None:\n            args.tmpdir = './npu_tmpdir'\n\n        outputs = multi_gpu_test(\n            model, data_loader, args.tmpdir, args.gpu_collect\n            or cfg.evaluation.get('gpu_collect', False))\n\n    rank, _ = get_dist_info()\n    if rank == 0:\n        if args.out:\n            print(f'\\nwriting results to {args.out}')\n            mmcv.dump(outputs, args.out)\n        kwargs = {} if args.eval_options is None else args.eval_options\n        if args.format_only:\n            dataset.format_results(outputs, **kwargs)\n        if args.eval:\n            eval_kwargs = cfg.get('evaluation', {}).copy()\n            # hard-code way to remove EvalHook args\n            for key in [\n                    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',\n                    'rule', 'dynamic_intervals'\n            ]:\n                eval_kwargs.pop(key, None)\n            eval_kwargs.update(dict(metric=args.eval, **kwargs))\n            metric = dataset.evaluate(outputs, **eval_kwargs)\n            print(metric)\n            metric_dict = dict(config=args.config, metric=metric)\n            if args.work_dir is not None and rank == 0:\n                mmcv.dump(metric_dict, json_file)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/mmdetection/tools/train.py",
    "content": "# Copyright (c) OpenMMLab. All rights reserved.\nimport argparse\nimport copy\nimport os\nimport os.path as osp\nimport time\nimport warnings\n\nimport mmcv\nimport torch\nimport torch.distributed as dist\nfrom mmcv import Config, DictAction\nfrom mmcv.runner import get_dist_info, init_dist\nfrom mmcv.utils import get_git_hash\n\nfrom mmdet import __version__\nfrom mmdet.apis import init_random_seed, set_random_seed, train_detector\nfrom mmdet.datasets import build_dataset\nfrom mmdet.models import build_detector\nfrom mmdet.utils import (collect_env, get_device, get_root_logger,\n                         replace_cfg_vals, setup_multi_processes,\n                         update_data_root)\n\n\ndef parse_args():\n    parser = argparse.ArgumentParser(description='Train a detector')\n    parser.add_argument('config', help='train config file path')\n    parser.add_argument('--work-dir', help='the dir to save logs and models')\n    parser.add_argument(\n        '--resume-from', help='the checkpoint file to resume from')\n    parser.add_argument(\n        '--auto-resume',\n        action='store_true',\n        help='resume from the latest checkpoint automatically')\n    parser.add_argument(\n        '--no-validate',\n        action='store_true',\n        help='whether not to evaluate the checkpoint during training')\n    group_gpus = parser.add_mutually_exclusive_group()\n    group_gpus.add_argument(\n        '--gpus',\n        type=int,\n        help='(Deprecated, please use --gpu-id) number of gpus to use '\n        '(only applicable to non-distributed training)')\n    group_gpus.add_argument(\n        '--gpu-ids',\n        type=int,\n        nargs='+',\n        help='(Deprecated, please use --gpu-id) ids of gpus to use '\n        '(only applicable to non-distributed training)')\n    group_gpus.add_argument(\n        '--gpu-id',\n        type=int,\n        default=0,\n        help='id of gpu to use '\n        '(only applicable to non-distributed training)')\n    parser.add_argument('--seed', type=int, default=None, help='random seed')\n    parser.add_argument(\n        '--diff-seed',\n        action='store_true',\n        help='Whether or not set different seeds for different ranks')\n    parser.add_argument(\n        '--deterministic',\n        action='store_true',\n        help='whether to set deterministic options for CUDNN backend.')\n    parser.add_argument(\n        '--options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file (deprecate), '\n        'change to --cfg-options instead.')\n    parser.add_argument(\n        '--cfg-options',\n        nargs='+',\n        action=DictAction,\n        help='override some settings in the used config, the key-value pair '\n        'in xxx=yyy format will be merged into config file. If the value to '\n        'be overwritten is a list, it should be like key=\"[a,b]\" or key=a,b '\n        'It also allows nested list/tuple values, e.g. key=\"[(a,b),(c,d)]\" '\n        'Note that the quotation marks are necessary and that no white space '\n        'is allowed.')\n    parser.add_argument(\n        '--launcher',\n        choices=['none', 'pytorch', 'slurm', 'mpi'],\n        default='none',\n        help='job launcher')\n    parser.add_argument('--local_rank', type=int, default=0)\n    parser.add_argument(\n        '--auto-scale-lr',\n        action='store_true',\n        help='enable automatically scaling LR.')\n    args = parser.parse_args()\n    if 'LOCAL_RANK' not in os.environ:\n        os.environ['LOCAL_RANK'] = str(args.local_rank)\n\n    if args.options and args.cfg_options:\n        raise ValueError(\n            '--options and --cfg-options cannot be both '\n            'specified, --options is deprecated in favor of --cfg-options')\n    if args.options:\n        warnings.warn('--options is deprecated in favor of --cfg-options')\n        args.cfg_options = args.options\n\n    return args\n\n\ndef main():\n    args = parse_args()\n\n    cfg = Config.fromfile(args.config)\n\n    # replace the ${key} with the value of cfg.key\n    cfg = replace_cfg_vals(cfg)\n\n    # update data root according to MMDET_DATASETS\n    update_data_root(cfg)\n\n    if args.cfg_options is not None:\n        cfg.merge_from_dict(args.cfg_options)\n\n    if args.auto_scale_lr:\n        if 'auto_scale_lr' in cfg and \\\n                'enable' in cfg.auto_scale_lr and \\\n                'base_batch_size' in cfg.auto_scale_lr:\n            cfg.auto_scale_lr.enable = True\n        else:\n            warnings.warn('Can not find \"auto_scale_lr\" or '\n                          '\"auto_scale_lr.enable\" or '\n                          '\"auto_scale_lr.base_batch_size\" in your'\n                          ' configuration file. Please update all the '\n                          'configuration files to mmdet >= 2.24.1.')\n\n    # set multi-process settings\n    setup_multi_processes(cfg)\n\n    # set cudnn_benchmark\n    if cfg.get('cudnn_benchmark', False):\n        torch.backends.cudnn.benchmark = True\n\n    # work_dir is determined in this priority: CLI > segment in file > filename\n    if args.work_dir is not None:\n        # update configs according to CLI args if args.work_dir is not None\n        cfg.work_dir = args.work_dir\n    elif cfg.get('work_dir', None) is None:\n        # use config filename as default work_dir if cfg.work_dir is None\n        cfg.work_dir = osp.join('./work_dirs',\n                                osp.splitext(osp.basename(args.config))[0])\n\n    if args.resume_from is not None:\n        cfg.resume_from = args.resume_from\n    cfg.auto_resume = args.auto_resume\n    if args.gpus is not None:\n        cfg.gpu_ids = range(1)\n        warnings.warn('`--gpus` is deprecated because we only support '\n                      'single GPU mode in non-distributed training. '\n                      'Use `gpus=1` now.')\n    if args.gpu_ids is not None:\n        cfg.gpu_ids = args.gpu_ids[0:1]\n        warnings.warn('`--gpu-ids` is deprecated, please use `--gpu-id`. '\n                      'Because we only support single GPU mode in '\n                      'non-distributed training. Use the first GPU '\n                      'in `gpu_ids` now.')\n    if args.gpus is None and args.gpu_ids is None:\n        cfg.gpu_ids = [args.gpu_id]\n\n    # init distributed env first, since logger depends on the dist info.\n    if args.launcher == 'none':\n        distributed = False\n    else:\n        distributed = True\n        init_dist(args.launcher, **cfg.dist_params)\n        # re-set gpu_ids with distributed training mode\n        _, world_size = get_dist_info()\n        cfg.gpu_ids = range(world_size)\n\n    # create work_dir\n    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))\n    # dump config\n    cfg.dump(osp.join(cfg.work_dir, osp.basename(args.config)))\n    # init the logger before other steps\n    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())\n    log_file = osp.join(cfg.work_dir, f'{timestamp}.log')\n    logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)\n\n    # init the meta dict to record some important information such as\n    # environment info and seed, which will be logged\n    meta = dict()\n    # log env info\n    env_info_dict = collect_env()\n    env_info = '\\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])\n    dash_line = '-' * 60 + '\\n'\n    logger.info('Environment info:\\n' + dash_line + env_info + '\\n' +\n                dash_line)\n    meta['env_info'] = env_info\n    meta['config'] = cfg.pretty_text\n    # log some basic info\n    logger.info(f'Distributed training: {distributed}')\n    logger.info(f'Config:\\n{cfg.pretty_text}')\n\n    cfg.device = get_device()\n    # set random seeds\n    seed = init_random_seed(args.seed, device=cfg.device)\n    seed = seed + dist.get_rank() if args.diff_seed else seed\n    logger.info(f'Set random seed to {seed}, '\n                f'deterministic: {args.deterministic}')\n    set_random_seed(seed, deterministic=args.deterministic)\n    cfg.seed = seed\n    meta['seed'] = seed\n    meta['exp_name'] = osp.basename(args.config)\n\n    model = build_detector(\n        cfg.model,\n        train_cfg=cfg.get('train_cfg'),\n        test_cfg=cfg.get('test_cfg'))\n    model.init_weights()\n\n    datasets = [build_dataset(cfg.data.train)]\n    if len(cfg.workflow) == 2:\n        assert 'val' in [mode for (mode, _) in cfg.workflow]\n        val_dataset = copy.deepcopy(cfg.data.val)\n        val_dataset.pipeline = cfg.data.train.get(\n            'pipeline', cfg.data.train.dataset.get('pipeline'))\n        datasets.append(build_dataset(val_dataset))\n    if cfg.checkpoint_config is not None:\n        # save mmdet version, config file content and class names in\n        # checkpoints as meta data\n        cfg.checkpoint_config.meta = dict(\n            mmdet_version=__version__ + get_git_hash()[:7],\n            CLASSES=datasets[0].CLASSES)\n    # add an attribute for visualization convenience\n    model.CLASSES = datasets[0].CLASSES\n    train_detector(\n        model,\n        datasets,\n        cfg,\n        distributed=distributed,\n        validate=(not args.no_validate),\n        timestamp=timestamp,\n        meta=meta)\n\n\nif __name__ == '__main__':\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/models_menu/mmscraper.py",
    "content": "import requests\nfrom bs4 import BeautifulSoup\nimport json\nimport requests\nfrom tqdm import tqdm\n\nurl = 'https://raw.githubusercontent.com/open-mmlab/mmdetection/2.x/README.md'\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all ul tags\nul_tags = soup.find_all('ul')\ninstance_seg = ul_tags[1]\n\n# for the all ul tags, make a dict of each li tag contents and href and append to a list\nli_tags = instance_seg.find_all('li')\nli_tags_list = []\nfor li in li_tags:\n    li_tags_list.append(\n        {'name': li.find('a').text, 'link': \"https://github.com/open-mmlab/mmdetection/tree/2.x/\" + li.find('a')['href'], })\nmodel_id = 0\ncol_names = [\"id\", \"Model\", \"Model Name\", \"Backbone\", \"Lr schd\", \"Memory (GB)\",\n             \"Inference Time (fps)\", \"Box AP\", \"Mask AP\", \"Config\", \"Checkpoint_link\"]\n\ntr_tags_list = []\n# =================================================================================================\n\n# Mask R-CNN (ICCV'2017)\nurl = li_tags_list[0]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the first table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[0]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"Mask R-CNN\"\n    td_tag_dict[\"Model Name\"] = \"Mask R-CNN\"\n    for i in range(len(td_tags)):\n        if i == 0:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 1:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 2:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 5:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 7:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 8:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n\n# =================================================================================================\n\n# Cascade Mask R-CNN (CVPR'2018)\nurl = li_tags_list[1]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[1]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"Cascade Mask R-CNN\"\n    td_tag_dict[\"Model Name\"] = \"Cascade Mask R-CNN\"\n    for i in range(len(td_tags)):\n        if i == 0:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 1:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 2:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 5:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 7:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 8:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n# =================================================================================================\n# Mask Scoring R-CNN (CVPR'2019)\nurl = li_tags_list[2]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[0]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"Mask Scoring R-CNN\"\n    td_tag_dict[\"Model Name\"] = \"Mask Scoring R-CNN\"\n    for i in range(len(td_tags)):\n        if i == 0:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 1:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 2:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 5:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 7:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 8:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n# =================================================================================================# Hybrid Task Cascade (CVPR'2019)\nurl = li_tags_list[3]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[0]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"Hybrid Task Cascade\"\n    td_tag_dict[\"Model Name\"] = \"Hybrid Task Cascade\"\n    for i in range(len(td_tags)):\n        if i == 0:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 1:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 2:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 5:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 7:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 8:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n# # =================================================================================================\n# # YOLACT (ICCV'2019)\nurl = li_tags_list[4]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[0]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"YOLACT\"\n    td_tag_dict[\"Model Name\"] = \"YOLACT\"\n    for i in range(len(td_tags)):\n        if i == 2:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n            td_tag_dict[\"Style\"] = \"-\"\n            td_tag_dict[\"Lr schd\"] = \"-\"\n            td_tag_dict[\"Memory (GB)\"] = \"-\"\n        elif i == 3:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"box AP\"] = \"-\"\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 7:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n# remove the model at index 1tr_tags_list.pop(1)\n# =================================================================================================\n\n# InstaBoost (ICCV'2019) cancelled ❌ requires custom installation\n\n# =================================================================================================\n\n# SOLO (ECCV'2020)\nurl = li_tags_list[6]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntables = soup.find_all('table')\n\nfor tableno, table in enumerate(tables):\n    tr_tags = table.find_all('tr')\n    for trno, tr in enumerate(tr_tags[1:]):\n        td_tags = tr.find_all('td')\n        td_tag_dict = {}\n        td_tag_dict[\"id\"] = model_id\n        model_id += 1\n        td_tag_dict[\"Model\"] = \"SOLO\"\n        if tableno == 0:\n            td_tag_dict[\"Model Name\"] = \"SOLO\"\n        elif tableno == 1:\n            td_tag_dict[\"Model Name\"] = \"Decoupled SOLO\"\n        elif tableno == 2:\n            td_tag_dict[\"Model Name\"] = \"Decoupled Light SOLO\"\n        for i in range(len(td_tags)):\n            if i == 0:\n                td_tag_dict[\"Backbone\"] = td_tags[i].text\n            elif i == 1:\n                td_tag_dict[\"Style\"] = td_tags[i].text\n            elif i == 3:\n                td_tag_dict[\"Lr schd\"] = td_tags[i].text\n            elif i == 4:\n                td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n            elif i == 5:\n                td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n            elif i == 6:\n                td_tag_dict[\"box AP\"] = \"-\"\n                td_tag_dict[\"mask AP\"] = td_tags[i].text\n            elif i == 7:\n                td_tag_dict[\"Config\"] = \"https://github.com/open-mmlab/mmdetection/tree/master/\"\n                if trno == 0 and tableno == 0:\n                    td_tag_dict[\"Config\"] += \"configs/solo/solo_r50_fpn_1x_coco.py\"\n                elif trno == 1 and tableno == 0:\n                    td_tag_dict[\"Config\"] += \"configs/solo/solo_r50_fpn_3x_coco.py\"\n                elif trno == 0 and tableno == 1:\n                    td_tag_dict[\"Config\"] += \"configs/solo/decoupled_solo_r50_fpn_1x_coco.py\"\n                elif trno == 1 and tableno == 1:\n                    td_tag_dict[\"Config\"] += \"configs/solo/decoupled_solo_r50_fpn_3x_coco.py\"\n                elif trno == 0 and tableno == 2:\n                    td_tag_dict[\"Config\"] += \"configs/solo/decoupled_solo_light_r50_fpn_3x_coco.py\"\n                td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n        tr_tags_list.append(td_tag_dict)\n# =================================================================================================\n# PointRend (CVPR'2020) cancelled ❌ caffe only\n\n# =================================================================================================\n\n# DetectoRS (ArXiv'2020) cancelled ❌ complicated format\n\n# =================================================================================================\n\n# SOLOv2 (NeurIPS'2020)\n\nurl = li_tags_list[9]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntables = soup.find_all('table')\n\n\nfor tableno, table in enumerate(tables):\n    tr_tags = table.find_all('tr')\n    for trno, tr in enumerate(tr_tags[1:]):\n        td_tags = tr.find_all('td')\n        td_tag_dict = {}\n        td_tag_dict[\"id\"] = model_id\n        model_id += 1\n        td_tag_dict[\"Model\"] = \"SOLOv2\"\n        if tableno == 0:\n            td_tag_dict[\"Model Name\"] = \"SOLOv2\"\n        elif tableno == 1:\n            td_tag_dict[\"Model Name\"] = \"Light SOLOv2\"\n        for i in range(len(td_tags)):\n            if i == 0:\n                td_tag_dict[\"Backbone\"] = td_tags[i].text\n            elif i == 1:\n                td_tag_dict[\"Style\"] = td_tags[i].text\n            elif i == 3:\n                td_tag_dict[\"Lr schd\"] = td_tags[i].text\n            elif i == 4:\n                td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n                td_tag_dict[\"Inference Time (fps)\"] = \"-\"\n                td_tag_dict[\"box AP\"] = \"-\"\n            elif i == 5:\n                td_tag_dict[\"mask AP\"] = td_tags[i].text\n            elif i == 6:\n                td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n            elif i == 7:\n                td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n        tr_tags_list.append(td_tag_dict)\n# =================================================================================================\n\n# SCNet (AAAI'2021)\n\nurl = li_tags_list[10]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[0]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"SCNet\"\n    td_tag_dict[\"Model Name\"] = \"SCNet\"\n    for i in range(len(td_tags)):\n        if i == 0:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 1:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 2:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 5:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 9:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 10:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n# =================================================================================================\n\n# QueryInst (ICCV'2021)\n\nurl = li_tags_list[11]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[0]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"QueryInst\"\n    td_tag_dict[\"Model Name\"] = \"QueryInst\"\n    for i in range(len(td_tags)):\n        if i == 1:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 2:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n            td_tag_dict[\"Memory (GB)\"] = \"-\"\n            td_tag_dict[\"Inference Time (fps)\"] = \"-\"\n        elif i == 7:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 8:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 9:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 10:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n# =================================================================================================\n\n# Mask2Former (ArXiv'2021)\n\nurl = li_tags_list[12]['link']\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all tr tags from the second table in the page, for each tr tag, get all td tags and append to a dictionary, append all dictionaries to a list\ntable = soup.find_all('table')[1]\ntr_tags = table.find_all('tr')\n\nfor tr in tr_tags[1:]:\n    td_tags = tr.find_all('td')\n    td_tag_dict = {}\n    td_tag_dict[\"id\"] = model_id\n    model_id += 1\n    td_tag_dict[\"Model\"] = \"Mask2Former\"\n    td_tag_dict[\"Model Name\"] = \"Mask2Former\"\n    for i in range(len(td_tags)):\n        if i == 0:\n            td_tag_dict[\"Backbone\"] = td_tags[i].text\n        elif i == 1:\n            td_tag_dict[\"Style\"] = td_tags[i].text\n        elif i == 3:\n            td_tag_dict[\"Lr schd\"] = td_tags[i].text\n        elif i == 4:\n            td_tag_dict[\"Memory (GB)\"] = td_tags[i].text\n        elif i == 5:\n            td_tag_dict[\"Inference Time (fps)\"] = td_tags[i].text\n        elif i == 6:\n            td_tag_dict[\"box AP\"] = td_tags[i].text\n        elif i == 7:\n            td_tag_dict[\"mask AP\"] = td_tags[i].text\n        elif i == 8:\n            td_tag_dict[\"Config\"] = td_tags[i].find('a')['href']\n        elif i == 9:\n            td_tag_dict[\"Checkpoint_link\"] = td_tags[i].find('a')['href']\n\n    tr_tags_list.append(td_tag_dict)\n\n#\n# Save the list of dictionaries as a json file\ntr_tags_list = [x for x in tr_tags_list if x[\"Style\"] != \"caffe\"]\n\n# reid all models\nid_count = 5\ncorrupted_models = []\nfor i in tqdm(range(len(tr_tags_list))):\n    tr_tags_list[i][\"id\"] = id_count\n    id_count += 1\n    # replace /open-mmlab/mmdetection/blob/master in config with /mmdetection/configs\n    tr_tags_list[i][\"Config\"] = tr_tags_list[i][\"Config\"].replace(\n        \"https://github.com/open-mmlab/mmdetection/tree/master\", \"mmdetection\")\n    tr_tags_list[i][\"Config\"] = tr_tags_list[i][\"Config\"].replace(\n        \"https://github.com/open-mmlab/mmdetection/blob/master\", \"mmdetection\")\n    tr_tags_list[i][\"Checkpoint\"] = \"mmdetection/checkpoints/\" + tr_tags_list[i][\"Checkpoint_link\"].split(\n        \"/\")[-1]\n    tr_tags_list[i][\"Checkpoint Size (MB)\"] = round(int(requests.head(\n        tr_tags_list[i][\"Checkpoint_link\"]).headers.get('Content-Length', 0)) / (1024 * 1024), 2)\n\n    if tr_tags_list[i][\"Checkpoint Size (MB)\"] == 0:\n        print(\"Checkpoint size not found for model: \", tr_tags_list[i][\"id\"])\n        corrupted_models.append(i)\n        id_count -= 1\n\n    tr_tags_list[i].pop(\"Style\", None)\n\n# remove corrupted models\nfor i in sorted(corrupted_models, reverse=True):\n    tr_tags_list.pop(i)\n\n\nwith open('models_json.json', 'w') as f:\n    json.dump(tr_tags_list, f, indent=4)\n"
  },
  {
    "path": "DLTA_AI_app/models_menu/models_json.json",
    "content": "[\r\n    {\r\n        \"id\": 0,\r\n        \"Model\": \"YOLOv8\",\r\n        \"Model Name\": \"YOLOv8n-seg\",\r\n        \"Backbone\": \"-\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"36.7\",\r\n        \"mask AP\": \"30.5\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n-seg.pt\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolov8n-seg.pt\",\r\n        \"Checkpoint Size (MB)\": 6.72\r\n    },\r\n    {\r\n        \"id\": 1,\r\n        \"Model\": \"YOLOv8\",\r\n        \"Model Name\": \"YOLOv8s-seg\",\r\n        \"Backbone\": \"-\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"44.6\",\r\n        \"mask AP\": \"36.8\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8s-seg.pt\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolov8s-seg.pt\",\r\n        \"Checkpoint Size (MB)\": 22.7\r\n    },\r\n    {\r\n        \"id\": 2,\r\n        \"Model\": \"YOLOv8\",\r\n        \"Model Name\": \"YOLOv8m-seg\",\r\n        \"Backbone\": \"-\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"49.9\",\r\n        \"mask AP\": \"40.8\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8m-seg.pt\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolov8m-seg.pt\",\r\n        \"Checkpoint Size (MB)\": 52.3\r\n    },\r\n    {\r\n        \"id\": 3,\r\n        \"Model\": \"YOLOv8\",\r\n        \"Model Name\": \"YOLOv8l-seg\",\r\n        \"Backbone\": \"-\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"52.3\",\r\n        \"mask AP\": \"42.6\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l-seg.pt\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolov8l-seg.pt\",\r\n        \"Checkpoint Size (MB)\": 88.1\r\n    },\r\n    {\r\n        \"id\": 4,\r\n        \"Model\": \"YOLOv8\",\r\n        \"Model Name\": \"YOLOv8x-seg\",\r\n        \"Backbone\": \"-\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"53.4\",\r\n        \"mask AP\": \"43.4\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8x-seg.pt\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolov8x-seg.pt\",\r\n        \"Checkpoint Size (MB)\": 137\r\n    },\r\n    {\r\n        \"id\": 5,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"4.4\",\r\n        \"Inference Time (fps)\": \"16.1\",\r\n        \"box AP\": \"38.2\",\r\n        \"mask AP\": \"34.7\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_1x_coco/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_r50_fpn_1x_coco_20200205-d4b0c5d6.pth\",\r\n        \"Checkpoint Size (MB)\": 169.62\r\n    },\r\n    {\r\n        \"id\": 6,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"R-50-FPN (FP16)\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"3.6\",\r\n        \"Inference Time (fps)\": \"24.1\",\r\n        \"box AP\": \"38.1\",\r\n        \"mask AP\": \"34.7\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_fp16_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/fp16/mask_rcnn_r50_fpn_fp16_1x_coco/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_r50_fpn_fp16_1x_coco_20200205-59faf7e4.pth\",\r\n        \"Checkpoint Size (MB)\": 85.05\r\n    },\r\n    {\r\n        \"id\": 7,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"2x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"39.2\",\r\n        \"mask AP\": \"35.4\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_r50_fpn_2x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_fpn_2x_coco/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_r50_fpn_2x_coco_bbox_mAP-0.392__segm_mAP-0.354_20200505_003907-3e542a40.pth\",\r\n        \"Checkpoint Size (MB)\": 169.63\r\n    },\r\n    {\r\n        \"id\": 8,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"6.4\",\r\n        \"Inference Time (fps)\": \"13.5\",\r\n        \"box AP\": \"40.0\",\r\n        \"mask AP\": \"36.1\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_r101_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_1x_coco/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_r101_fpn_1x_coco_20200204-1efe0ed5.pth\",\r\n        \"Checkpoint Size (MB)\": 242.32\r\n    },\r\n    {\r\n        \"id\": 9,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"2x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"40.8\",\r\n        \"mask AP\": \"36.6\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_r101_fpn_2x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r101_fpn_2x_coco/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_r101_fpn_2x_coco_bbox_mAP-0.408__segm_mAP-0.366_20200505_071027-14b391c7.pth\",\r\n        \"Checkpoint Size (MB)\": 242.32\r\n    },\r\n    {\r\n        \"id\": 10,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"X-101-32x4d-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"7.6\",\r\n        \"Inference Time (fps)\": \"11.3\",\r\n        \"box AP\": \"41.9\",\r\n        \"mask AP\": \"37.5\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_1x_coco/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_x101_32x4d_fpn_1x_coco_20200205-478d0b67.pth\",\r\n        \"Checkpoint Size (MB)\": 241.03\r\n    },\r\n    {\r\n        \"id\": 11,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"X-101-32x4d-FPN\",\r\n        \"Lr schd\": \"2x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"42.2\",\r\n        \"mask AP\": \"37.8\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x4d_fpn_2x_coco/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_x101_32x4d_fpn_2x_coco_bbox_mAP-0.422__segm_mAP-0.378_20200506_004702-faef898c.pth\",\r\n        \"Checkpoint Size (MB)\": 241.03\r\n    },\r\n    {\r\n        \"id\": 12,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"X-101-64x4d-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"10.7\",\r\n        \"Inference Time (fps)\": \"8.0\",\r\n        \"box AP\": \"42.8\",\r\n        \"mask AP\": \"38.4\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_1x_coco/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_x101_64x4d_fpn_1x_coco_20200201-9352eb0d.pth\",\r\n        \"Checkpoint Size (MB)\": 391.11\r\n    },\r\n    {\r\n        \"id\": 13,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"X-101-64x4d-FPN\",\r\n        \"Lr schd\": \"2x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"42.7\",\r\n        \"mask AP\": \"38.1\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_64x4d_fpn_2x_coco/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_x101_64x4d_fpn_2x_coco_20200509_224208-39d6f70c.pth\",\r\n        \"Checkpoint Size (MB)\": 391.11\r\n    },\r\n    {\r\n        \"id\": 14,\r\n        \"Model\": \"Mask R-CNN\",\r\n        \"Model Name\": \"Mask R-CNN\",\r\n        \"Backbone\": \"X-101-32x8d-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"10.6\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"42.8\",\r\n        \"mask AP\": \"38.3\",\r\n        \"Config\": \"mmdetection/configs/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_x101_32x8d_fpn_1x_coco/mask_rcnn_x101_32x8d_fpn_1x_coco_20220630_173841-0aaf329e.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask_rcnn_x101_32x8d_fpn_1x_coco_20220630_173841-0aaf329e.pth\",\r\n        \"Checkpoint Size (MB)\": 411.48\r\n    },\r\n    {\r\n        \"id\": 15,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"6.0\",\r\n        \"Inference Time (fps)\": \"11.2\",\r\n        \"box AP\": \"41.2\",\r\n        \"mask AP\": \"35.9\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_r50_fpn_1x_coco_20200203-9d4dcb24.pth\",\r\n        \"Checkpoint Size (MB)\": 295.24\r\n    },\r\n    {\r\n        \"id\": 16,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"41.9\",\r\n        \"mask AP\": \"36.5\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r50_fpn_20e_coco/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_r50_fpn_20e_coco_bbox_mAP-0.419__segm_mAP-0.365_20200504_174711-4af8e66e.pth\",\r\n        \"Checkpoint Size (MB)\": 295.25\r\n    },\r\n    {\r\n        \"id\": 17,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"7.9\",\r\n        \"Inference Time (fps)\": \"9.8\",\r\n        \"box AP\": \"42.9\",\r\n        \"mask AP\": \"37.3\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_1x_coco/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_r101_fpn_1x_coco_20200203-befdf6ee.pth\",\r\n        \"Checkpoint Size (MB)\": 367.94\r\n    },\r\n    {\r\n        \"id\": 18,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"43.4\",\r\n        \"mask AP\": \"37.8\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_r101_fpn_20e_coco/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_r101_fpn_20e_coco_bbox_mAP-0.434__segm_mAP-0.378_20200504_174836-005947da.pth\",\r\n        \"Checkpoint Size (MB)\": 367.95\r\n    },\r\n    {\r\n        \"id\": 19,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"X-101-32x4d-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"9.2\",\r\n        \"Inference Time (fps)\": \"8.6\",\r\n        \"box AP\": \"44.3\",\r\n        \"mask AP\": \"38.3\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_x101_32x4d_fpn_1x_coco_20200201-0f411b1f.pth\",\r\n        \"Checkpoint Size (MB)\": 366.65\r\n    },\r\n    {\r\n        \"id\": 20,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"X-101-32x4d-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"9.2\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"45.0\",\r\n        \"mask AP\": \"39.0\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_x101_32x4d_fpn_20e_coco_20200528_083917-ed1f4751.pth\",\r\n        \"Checkpoint Size (MB)\": 366.65\r\n    },\r\n    {\r\n        \"id\": 21,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"X-101-64x4d-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"12.2\",\r\n        \"Inference Time (fps)\": \"6.7\",\r\n        \"box AP\": \"45.3\",\r\n        \"mask AP\": \"39.2\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_x101_64x4d_fpn_1x_coco_20200203-9a2db89d.pth\",\r\n        \"Checkpoint Size (MB)\": 516.73\r\n    },\r\n    {\r\n        \"id\": 22,\r\n        \"Model\": \"Cascade Mask R-CNN\",\r\n        \"Model Name\": \"Cascade Mask R-CNN\",\r\n        \"Backbone\": \"X-101-64x4d-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"12.2\",\r\n        \"Inference Time (fps)\": \"\",\r\n        \"box AP\": \"45.6\",\r\n        \"mask AP\": \"39.5\",\r\n        \"Config\": \"mmdetection/configs/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/cascade_rcnn/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/cascade_mask_rcnn_x101_64x4d_fpn_20e_coco_20200512_161033-bdb5126a.pth\",\r\n        \"Checkpoint Size (MB)\": 516.73\r\n    },\r\n    {\r\n        \"id\": 23,\r\n        \"Model\": \"Mask Scoring R-CNN\",\r\n        \"Model Name\": \"Mask Scoring R-CNN\",\r\n        \"Backbone\": \"R-X101-32x4d\",\r\n        \"Lr schd\": \"2x\",\r\n        \"Memory (GB)\": \"7.9\",\r\n        \"Inference Time (fps)\": \"11.0\",\r\n        \"box AP\": \"41.8\",\r\n        \"mask AP\": \"38.7\",\r\n        \"Config\": \"mmdetection/configs/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_32x4d_fpn_1x_coco/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/ms_rcnn_x101_32x4d_fpn_1x_coco_20200206-81fd1740.pth\",\r\n        \"Checkpoint Size (MB)\": 303.36\r\n    },\r\n    {\r\n        \"id\": 24,\r\n        \"Model\": \"Mask Scoring R-CNN\",\r\n        \"Model Name\": \"Mask Scoring R-CNN\",\r\n        \"Backbone\": \"R-X101-64x4d\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"11.0\",\r\n        \"Inference Time (fps)\": \"8.0\",\r\n        \"box AP\": \"43.0\",\r\n        \"mask AP\": \"39.5\",\r\n        \"Config\": \"mmdetection/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_1x_coco/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/ms_rcnn_x101_64x4d_fpn_1x_coco_20200206-86ba88d2.pth\",\r\n        \"Checkpoint Size (MB)\": 453.44\r\n    },\r\n    {\r\n        \"id\": 25,\r\n        \"Model\": \"Mask Scoring R-CNN\",\r\n        \"Model Name\": \"Mask Scoring R-CNN\",\r\n        \"Backbone\": \"R-X101-64x4d\",\r\n        \"Lr schd\": \"2x\",\r\n        \"Memory (GB)\": \"11.0\",\r\n        \"Inference Time (fps)\": \"8.0\",\r\n        \"box AP\": \"42.6\",\r\n        \"mask AP\": \"39.5\",\r\n        \"Config\": \"mmdetection/configs/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/ms_rcnn/ms_rcnn_x101_64x4d_fpn_2x_coco/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/ms_rcnn_x101_64x4d_fpn_2x_coco_20200308-02a445e2.pth\",\r\n        \"Checkpoint Size (MB)\": 453.44\r\n    },\r\n    {\r\n        \"id\": 26,\r\n        \"Model\": \"Hybrid Task Cascade\",\r\n        \"Model Name\": \"Hybrid Task Cascade\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"8.2\",\r\n        \"Inference Time (fps)\": \"5.8\",\r\n        \"box AP\": \"42.3\",\r\n        \"mask AP\": \"37.4\",\r\n        \"Config\": \"mmdetection/configs/htc/htc_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_1x_coco/htc_r50_fpn_1x_coco_20200317-7332cf16.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/htc_r50_fpn_1x_coco_20200317-7332cf16.pth\",\r\n        \"Checkpoint Size (MB)\": 306.44\r\n    },\r\n    {\r\n        \"id\": 27,\r\n        \"Model\": \"Hybrid Task Cascade\",\r\n        \"Model Name\": \"Hybrid Task Cascade\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"8.2\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"43.3\",\r\n        \"mask AP\": \"38.3\",\r\n        \"Config\": \"mmdetection/configs/htc/htc_r50_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r50_fpn_20e_coco/htc_r50_fpn_20e_coco_20200319-fe28c577.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/htc_r50_fpn_20e_coco_20200319-fe28c577.pth\",\r\n        \"Checkpoint Size (MB)\": 306.44\r\n    },\r\n    {\r\n        \"id\": 28,\r\n        \"Model\": \"Hybrid Task Cascade\",\r\n        \"Model Name\": \"Hybrid Task Cascade\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"10.2\",\r\n        \"Inference Time (fps)\": \"5.5\",\r\n        \"box AP\": \"44.8\",\r\n        \"mask AP\": \"39.6\",\r\n        \"Config\": \"mmdetection/configs/htc/htc_r101_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/htc/htc_r101_fpn_20e_coco/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/htc_r101_fpn_20e_coco_20200317-9b41b48f.pth\",\r\n        \"Checkpoint Size (MB)\": 379.14\r\n    },\r\n    {\r\n        \"id\": 29,\r\n        \"Model\": \"Hybrid Task Cascade\",\r\n        \"Model Name\": \"Hybrid Task Cascade\",\r\n        \"Backbone\": \"X-101-32x4d-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"11.4\",\r\n        \"Inference Time (fps)\": \"5.0\",\r\n        \"box AP\": \"46.1\",\r\n        \"mask AP\": \"40.5\",\r\n        \"Config\": \"mmdetection/configs/htc/htc_x101_32x4d_fpn_16x1_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_32x4d_fpn_16x1_20e_coco/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/htc_x101_32x4d_fpn_16x1_20e_coco_20200318-de97ae01.pth\",\r\n        \"Checkpoint Size (MB)\": 377.84\r\n    },\r\n    {\r\n        \"id\": 30,\r\n        \"Model\": \"Hybrid Task Cascade\",\r\n        \"Model Name\": \"Hybrid Task Cascade\",\r\n        \"Backbone\": \"X-101-64x4d-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"14.5\",\r\n        \"Inference Time (fps)\": \"4.4\",\r\n        \"box AP\": \"47.0\",\r\n        \"mask AP\": \"41.4\",\r\n        \"Config\": \"mmdetection/configs/htc/htc_x101_64x4d_fpn_16x1_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/htc/htc_x101_64x4d_fpn_16x1_20e_coco/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/htc_x101_64x4d_fpn_16x1_20e_coco_20200318-b181fd7a.pth\",\r\n        \"Checkpoint Size (MB)\": 527.92\r\n    },\r\n    {\r\n        \"id\": 31,\r\n        \"Model\": \"YOLACT\",\r\n        \"Model Name\": \"YOLACT\",\r\n        \"Backbone\": \"Resnet50-FPN\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"42.5\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"29.0\",\r\n        \"Config\": \"mmdetection/configs/yolact/yolact_r50_1x8_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_1x8_coco/yolact_r50_1x8_coco_20200908-f38d58df.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolact_r50_1x8_coco_20200908-f38d58df.pth\",\r\n        \"Checkpoint Size (MB)\": 134.96\r\n    },\r\n    {\r\n        \"id\": 32,\r\n        \"Model\": \"YOLACT\",\r\n        \"Model Name\": \"YOLACT\",\r\n        \"Backbone\": \"Resnet50-FPN\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"42.5\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"28.4\",\r\n        \"Config\": \"mmdetection/configs/yolact/yolact_r50_8x8_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r50_8x8_coco/yolact_r50_8x8_coco_20200908-ca34f5db.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolact_r50_8x8_coco_20200908-ca34f5db.pth\",\r\n        \"Checkpoint Size (MB)\": 134.96\r\n    },\r\n    {\r\n        \"id\": 33,\r\n        \"Model\": \"YOLACT\",\r\n        \"Model Name\": \"YOLACT\",\r\n        \"Backbone\": \"Resnet101-FPN\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"33.5\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"30.4\",\r\n        \"Config\": \"mmdetection/configs/yolact/yolact_r101_1x8_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/yolact/yolact_r101_1x8_coco/yolact_r101_1x8_coco_20200908-4cbe9101.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/yolact_r101_1x8_coco_20200908-4cbe9101.pth\",\r\n        \"Checkpoint Size (MB)\": 207.7\r\n    },\r\n    {\r\n        \"id\": 34,\r\n        \"Model\": \"SOLO\",\r\n        \"Model Name\": \"SOLO\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"8.0\",\r\n        \"Inference Time (fps)\": \"14.0\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"33.1\",\r\n        \"Config\": \"mmdetection/configs/solo/solo_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_1x_coco/solo_r50_fpn_1x_coco_20210821_035055-2290a6b8.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solo_r50_fpn_1x_coco_20210821_035055-2290a6b8.pth\",\r\n        \"Checkpoint Size (MB)\": 138.75\r\n    },\r\n    {\r\n        \"id\": 35,\r\n        \"Model\": \"SOLO\",\r\n        \"Model Name\": \"SOLO\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"7.4\",\r\n        \"Inference Time (fps)\": \"14.0\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"35.9\",\r\n        \"Config\": \"mmdetection/configs/solo/solo_r50_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solo/solo_r50_fpn_3x_coco/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solo_r50_fpn_3x_coco_20210901_012353-11d224d7.pth\",\r\n        \"Checkpoint Size (MB)\": 138.75\r\n    },\r\n    {\r\n        \"id\": 36,\r\n        \"Model\": \"SOLO\",\r\n        \"Model Name\": \"Decoupled SOLO\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"7.8\",\r\n        \"Inference Time (fps)\": \"12.5\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"33.9\",\r\n        \"Config\": \"mmdetection/configs/solo/decoupled_solo_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_1x_coco/decoupled_solo_r50_fpn_1x_coco_20210820_233348-6337c589.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/decoupled_solo_r50_fpn_1x_coco_20210820_233348-6337c589.pth\",\r\n        \"Checkpoint Size (MB)\": 152.97\r\n    },\r\n    {\r\n        \"id\": 37,\r\n        \"Model\": \"SOLO\",\r\n        \"Model Name\": \"Decoupled SOLO\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"7.9\",\r\n        \"Inference Time (fps)\": \"12.5\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"36.7\",\r\n        \"Config\": \"mmdetection/configs/solo/decoupled_solo_r50_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_r50_fpn_3x_coco/decoupled_solo_r50_fpn_3x_coco_20210821_042504-7b3301ec.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/decoupled_solo_r50_fpn_3x_coco_20210821_042504-7b3301ec.pth\",\r\n        \"Checkpoint Size (MB)\": 152.97\r\n    },\r\n    {\r\n        \"id\": 38,\r\n        \"Model\": \"SOLO\",\r\n        \"Model Name\": \"Decoupled Light SOLO\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"2.2\",\r\n        \"Inference Time (fps)\": \"31.2\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"32.9\",\r\n        \"Config\": \"mmdetection/configs/solo/decoupled_solo_light_r50_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solo/decoupled_solo_light_r50_fpn_3x_coco/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703-e70e226f.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/decoupled_solo_light_r50_fpn_3x_coco_20210906_142703-e70e226f.pth\",\r\n        \"Checkpoint Size (MB)\": 123.69\r\n    },\r\n    {\r\n        \"id\": 39,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"SOLOv2\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"5.1\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"34.8\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_1x_coco/solov2_r50_fpn_1x_coco_20220512_125858-a357fa23.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_r50_fpn_1x_coco_20220512_125858-a357fa23.pth\",\r\n        \"Checkpoint Size (MB)\": 178.02\r\n    },\r\n    {\r\n        \"id\": 40,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"SOLOv2\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"5.1\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"37.5\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_r50_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r50_fpn_3x_coco/solov2_r50_fpn_3x_coco_20220512_125856-fed092d4.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_r50_fpn_3x_coco_20220512_125856-fed092d4.pth\",\r\n        \"Checkpoint Size (MB)\": 178.02\r\n    },\r\n    {\r\n        \"id\": 41,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"SOLOv2\",\r\n        \"Backbone\": \"R-101\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"6.9\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"39.1\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_r101_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_fpn_3x_coco/solov2_r101_fpn_3x_coco_20220511_095119-c559a076.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_r101_fpn_3x_coco_20220511_095119-c559a076.pth\",\r\n        \"Checkpoint Size (MB)\": 250.72\r\n    },\r\n    {\r\n        \"id\": 42,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"SOLOv2\",\r\n        \"Backbone\": \"R-101(DCN)\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"7.1\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"41.2\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_r101_dcn_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_r101_dcn_fpn_3x_coco/solov2_r101_dcn_fpn_3x_coco_20220513_214734-16c966cb.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_r101_dcn_fpn_3x_coco_20220513_214734-16c966cb.pth\",\r\n        \"Checkpoint Size (MB)\": 262.74\r\n    },\r\n    {\r\n        \"id\": 43,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"SOLOv2\",\r\n        \"Backbone\": \"X-101(DCN)\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"11.3\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"42.4\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_x101_dcn_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_x101_dcn_fpn_3x_coco/solov2_x101_dcn_fpn_3x_coco_20220513_214337-aef41095.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_x101_dcn_fpn_3x_coco_20220513_214337-aef41095.pth\",\r\n        \"Checkpoint Size (MB)\": 433.59\r\n    },\r\n    {\r\n        \"id\": 44,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"Light SOLOv2\",\r\n        \"Backbone\": \"R-18\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"9.1\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"29.7\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_light_r18_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r18_fpn_3x_coco/solov2_light_r18_fpn_3x_coco_20220511_083717-75fa355b.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_light_r18_fpn_3x_coco_20220511_083717-75fa355b.pth\",\r\n        \"Checkpoint Size (MB)\": 69.78\r\n    },\r\n    {\r\n        \"id\": 45,\r\n        \"Model\": \"SOLOv2\",\r\n        \"Model Name\": \"Light SOLOv2\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"9.9\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"33.7\",\r\n        \"Config\": \"mmdetection/configs/solov2/solov2_light_r50_fpn_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/solov2/solov2_light_r50_fpn_3x_coco/solov2_light_r50_fpn_3x_coco_20220512_165256-c93a6074.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/solov2_light_r50_fpn_3x_coco_20220512_165256-c93a6074.pth\",\r\n        \"Checkpoint Size (MB)\": 119.84\r\n    },\r\n    {\r\n        \"id\": 46,\r\n        \"Model\": \"SCNet\",\r\n        \"Model Name\": \"SCNet\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"7.0\",\r\n        \"Inference Time (fps)\": \"6.2\",\r\n        \"box AP\": \"43.5\",\r\n        \"mask AP\": \"39.2\",\r\n        \"Config\": \"mmdetection/configs/scnet/scnet_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_1x_coco/scnet_r50_fpn_1x_coco-c3f09857.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/scnet_r50_fpn_1x_coco-c3f09857.pth\",\r\n        \"Checkpoint Size (MB)\": 361.98\r\n    },\r\n    {\r\n        \"id\": 47,\r\n        \"Model\": \"SCNet\",\r\n        \"Model Name\": \"SCNet\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"7.0\",\r\n        \"Inference Time (fps)\": \"6.2\",\r\n        \"box AP\": \"44.5\",\r\n        \"mask AP\": \"40.0\",\r\n        \"Config\": \"mmdetection/configs/scnet/scnet_r50_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r50_fpn_20e_coco/scnet_r50_fpn_20e_coco-a569f645.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/scnet_r50_fpn_20e_coco-a569f645.pth\",\r\n        \"Checkpoint Size (MB)\": 361.98\r\n    },\r\n    {\r\n        \"id\": 48,\r\n        \"Model\": \"SCNet\",\r\n        \"Model Name\": \"SCNet\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"8.9\",\r\n        \"Inference Time (fps)\": \"5.8\",\r\n        \"box AP\": \"45.8\",\r\n        \"mask AP\": \"40.9\",\r\n        \"Config\": \"mmdetection/configs/scnet/scnet_r101_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_r101_fpn_20e_coco/scnet_r101_fpn_20e_coco-294e312c.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/scnet_r101_fpn_20e_coco-294e312c.pth\",\r\n        \"Checkpoint Size (MB)\": 434.73\r\n    },\r\n    {\r\n        \"id\": 49,\r\n        \"Model\": \"SCNet\",\r\n        \"Model Name\": \"SCNet\",\r\n        \"Backbone\": \"X-101-64x4d-FPN\",\r\n        \"Lr schd\": \"20e\",\r\n        \"Memory (GB)\": \"13.2\",\r\n        \"Inference Time (fps)\": \"4.9\",\r\n        \"box AP\": \"47.5\",\r\n        \"mask AP\": \"42.3\",\r\n        \"Config\": \"mmdetection/configs/scnet/scnet_x101_64x4d_fpn_20e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/scnet/scnet_x101_64x4d_fpn_20e_coco/scnet_x101_64x4d_fpn_20e_coco-fb09dec9.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/scnet_x101_64x4d_fpn_20e_coco-fb09dec9.pth\",\r\n        \"Checkpoint Size (MB)\": 583.52\r\n    },\r\n    {\r\n        \"id\": 50,\r\n        \"Model\": \"QueryInst\",\r\n        \"Model Name\": \"QueryInst\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"1x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"42.0\",\r\n        \"mask AP\": \"37.5\",\r\n        \"Config\": \"mmdetection/configs/queryinst/queryinst_r50_fpn_1x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_1x_coco/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/queryinst_r50_fpn_1x_coco_20210907_084916-5a8f1998.pth\",\r\n        \"Checkpoint Size (MB)\": 659.15\r\n    },\r\n    {\r\n        \"id\": 51,\r\n        \"Model\": \"QueryInst\",\r\n        \"Model Name\": \"QueryInst\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"44.8\",\r\n        \"mask AP\": \"39.8\",\r\n        \"Config\": \"mmdetection/configs/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_mstrain_480-800_3x_coco/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/queryinst_r50_fpn_mstrain_480-800_3x_coco_20210901_103643-7837af86.pth\",\r\n        \"Checkpoint Size (MB)\": 659.16\r\n    },\r\n    {\r\n        \"id\": 52,\r\n        \"Model\": \"QueryInst\",\r\n        \"Model Name\": \"QueryInst\",\r\n        \"Backbone\": \"R-50-FPN\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"47.5\",\r\n        \"mask AP\": \"41.7\",\r\n        \"Config\": \"mmdetection/configs/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/queryinst_r50_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_101802-85cffbd8.pth\",\r\n        \"Checkpoint Size (MB)\": 659.36\r\n    },\r\n    {\r\n        \"id\": 53,\r\n        \"Model\": \"QueryInst\",\r\n        \"Model Name\": \"QueryInst\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"46.4\",\r\n        \"mask AP\": \"41.0\",\r\n        \"Config\": \"mmdetection/configs/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_mstrain_480-800_3x_coco/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/queryinst_r101_fpn_mstrain_480-800_3x_coco_20210904_104048-91f9995b.pth\",\r\n        \"Checkpoint Size (MB)\": 731.85\r\n    },\r\n    {\r\n        \"id\": 54,\r\n        \"Model\": \"QueryInst\",\r\n        \"Model Name\": \"QueryInst\",\r\n        \"Backbone\": \"R-101-FPN\",\r\n        \"Lr schd\": \"3x\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"49.0\",\r\n        \"mask AP\": \"42.9\",\r\n        \"Config\": \"mmdetection/configs/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/queryinst/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/queryinst_r101_fpn_300_proposals_crop_mstrain_480-800_3x_coco_20210904_153621-76cce59f.pth\",\r\n        \"Checkpoint Size (MB)\": 732.05\r\n    },\r\n    {\r\n        \"id\": 55,\r\n        \"Model\": \"Mask2Former\",\r\n        \"Model Name\": \"Mask2Former\",\r\n        \"Backbone\": \"R-50\",\r\n        \"Lr schd\": \"50e\",\r\n        \"Memory (GB)\": \"13.7\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"45.7\",\r\n        \"mask AP\": \"42.9\",\r\n        \"Config\": \"mmdetection/configs/mask2former/mask2former_r50_lsj_8x2_50e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r50_lsj_8x2_50e_coco/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask2former_r50_lsj_8x2_50e_coco_20220506_191028-8e96e88b.pth\",\r\n        \"Checkpoint Size (MB)\": 168.3\r\n    },\r\n    {\r\n        \"id\": 56,\r\n        \"Model\": \"Mask2Former\",\r\n        \"Model Name\": \"Mask2Former\",\r\n        \"Backbone\": \"R-101\",\r\n        \"Lr schd\": \"50e\",\r\n        \"Memory (GB)\": \"15.5\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"46.7\",\r\n        \"mask AP\": \"44.0\",\r\n        \"Config\": \"mmdetection/configs/mask2former/mask2former_r101_lsj_8x2_50e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_r101_lsj_8x2_50e_coco/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask2former_r101_lsj_8x2_50e_coco_20220426_100250-c50b6fa6.pth\",\r\n        \"Checkpoint Size (MB)\": 241.0\r\n    },\r\n    {\r\n        \"id\": 57,\r\n        \"Model\": \"Mask2Former\",\r\n        \"Model Name\": \"Mask2Former\",\r\n        \"Backbone\": \"Swin-T\",\r\n        \"Lr schd\": \"50e\",\r\n        \"Memory (GB)\": \"15.3\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"47.7\",\r\n        \"mask AP\": \"44.7\",\r\n        \"Config\": \"mmdetection/configs/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask2former_swin-t-p4-w7-224_lsj_8x2_50e_coco_20220508_091649-4a943037.pth\",\r\n        \"Checkpoint Size (MB)\": 181.28\r\n    },\r\n    {\r\n        \"id\": 58,\r\n        \"Model\": \"Mask2Former\",\r\n        \"Model Name\": \"Mask2Former\",\r\n        \"Backbone\": \"Swin-S\",\r\n        \"Lr schd\": \"50e\",\r\n        \"Memory (GB)\": \"18.8\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"49.3\",\r\n        \"mask AP\": \"46.1\",\r\n        \"Config\": \"mmdetection/configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py\",\r\n        \"Checkpoint_link\": \"https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth\",\r\n        \"Checkpoint Size (MB)\": 262.86\r\n    },\r\n    {\r\n        \"id\": 59,\r\n        \"Model\": \"SAM\",\r\n        \"Model Name\": \"ViT-H SAM model\",\r\n        \"Backbone\": \"ViT-H\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"-\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/sam_vit_h_4b8939.pth\",\r\n        \"Checkpoint Size (MB)\": 2445.75\r\n    },\r\n    {\r\n        \"id\": 60,\r\n        \"Model\": \"SAM\",\r\n        \"Model Name\": \"ViT-L SAM model\",\r\n        \"Backbone\": \"ViT-L\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"-\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/sam_vit_l_0b3195.pth\",\r\n        \"Checkpoint Size (MB)\": 1191.64\r\n    },\r\n    {\r\n        \"id\": 61,\r\n        \"Model\": \"SAM\",\r\n        \"Model Name\": \"ViT-B SAM model\",\r\n        \"Backbone\": \"ViT-B\",\r\n        \"Lr schd\": \"-\",\r\n        \"Memory (GB)\": \"-\",\r\n        \"Inference Time (fps)\": \"-\",\r\n        \"box AP\": \"-\",\r\n        \"mask AP\": \"-\",\r\n        \"Config\": \"-\",\r\n        \"Checkpoint_link\": \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth\",\r\n        \"Checkpoint\": \"mmdetection/checkpoints/sam_vit_b_01ec64.pth\",\r\n        \"Checkpoint Size (MB)\": 357.67\r\n    }\r\n]"
  },
  {
    "path": "DLTA_AI_app/models_menu/samScraper.py",
    "content": "import requests\nfrom bs4 import BeautifulSoup\nimport json\nimport requests\n\nurl = 'https://github.com/facebookresearch/segment-anything/blob/main/README.md'\npage = requests.get(url)\nsoup = BeautifulSoup(page.content, 'html.parser')\n\n# get all ul inside article tag\nul = soup.find('article').find_all('ul')\n\nmodels_json = []\n# get all li inside ul\nli = ul[0].find_all('li')\nfor i in li:\n    model = {}\n    #print(i.find('a').text.split(\" \")[0]) # get text inside a tag (model name)\n    name =  i.find('a').text.split(\" \")[0]\n    name = name.replace(\"-\", \"_\").lower()\n    model['name'] = name\n    #print(i.find('a')['href']) # get href inside a tag)\n    model['url'] = i.find('a')['href']\n    checkpoint = \"mmdetection/checkpoints/\" + i.find('a')['href'].split(\"/\")[-1]\n    model['checkpoint'] = checkpoint\n    models_json.append(model)\n\nwith open (\"sam_models.json\", \"w\") as f:\n            json.dump(models_json, f, indent=4)"
  },
  {
    "path": "DLTA_AI_app/models_menu/sam_models.json",
    "content": "[\n    {\n        \"name\": \"vit_h\",\n        \"url\": \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth\",\n        \"checkpoint\": \"mmdetection/checkpoints/sam_vit_h_4b8939.pth\"\n    },\n    {\n        \"name\": \"vit_l\",\n        \"url\": \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_l_0b3195.pth\",\n        \"checkpoint\": \"mmdetection/checkpoints/sam_vit_l_0b3195.pth\"\n    },\n    {\n        \"name\": \"vit_b\",\n        \"url\": \"https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth\",\n        \"checkpoint\": \"mmdetection/checkpoints/sam_vit_b_01ec64.pth\"\n    }\n]"
  },
  {
    "path": "DLTA_AI_app/setup.py",
    "content": "from __future__ import print_function\n\nimport distutils.spawn\nimport os\nimport re\nimport shlex\nimport subprocess\nimport sys\n\nfrom setuptools import find_packages\nfrom setuptools import setup\n\n\n# def get_version():\n#     filename = \"labelme/__init__.py\"\n#     with open(filename) as f:\n#         match = re.search(\n#             r\"\"\"^__version__ = ['\"]([^'\"]*)['\"]\"\"\", f.read(), re.M\n#         )\n#     if not match:\n#         raise RuntimeError(\"{} doesn't contain __version__\".format(filename))\n#     version = match.groups()[0]\n#     return version\n\n\n# def get_install_requires():\n#     PY3 = sys.version_info[0] == 3\n#     PY2 = sys.version_info[0] == 2\n#     assert PY3 or PY2\n\n#     install_requires = [\n#         \"imgviz>=0.11,<1.3\",\n#         \"matplotlib<3.3\",  # for PyInstaller\n#         \"numpy\",\n#         \"Pillow>=2.8\",\n#         \"PyYAML\",\n#         \"PyQt6\",\n#         \"termcolor\",\n#     ]\n\n#     # Find python binding for qt with priority:\n#     # PyQt6 -> PySide2 -> PyQt4,\n#     # and PyQt6 is automatically installed on Python3.\n#     QT_BINDING = None\n\n#     try:\n#         import PyQt6  # NOQA\n\n#         QT_BINDING = \"pyqt5\"\n#     except ImportError:\n#         pass\n\n#     if QT_BINDING is None:\n#         try:\n#             import PySide2  # NOQA\n\n#             QT_BINDING = \"pyside2\"\n#         except ImportError:\n#             pass\n\n#     if QT_BINDING is None:\n#         try:\n#             import PyQt4  # NOQA\n\n#             QT_BINDING = \"pyqt4\"\n#         except ImportError:\n#             if PY2:\n#                 print(\n#                     \"Please install PyQt6, PySide2 or PyQt4 for Python2.\\n\"\n#                     \"Note that PyQt6 can be installed via pip for Python3.\",\n#                     file=sys.stderr,\n#                 )\n#                 sys.exit(1)\n#             assert PY3\n#             # PyQt6 can be installed via pip for Python3\n#             # 5.15.3, 5.15.4 won't work with PyInstaller\n#             install_requires.append(\"PyQt6!=5.15.3,!=5.15.4\")\n#             QT_BINDING = \"pyqt5\"\n#     del QT_BINDING\n\n#     if os.name == \"nt\":  # Windows\n#         install_requires.append(\"colorama\")\n\n#     return install_requires\n\n\ndef get_long_description():\n    with open(\"README.md\") as f:\n        long_description = f.read()\n    try:\n        import github2pypi\n\n        return github2pypi.replace_url(\n            slug=\"wkentaro/labelme\", content=long_description\n        )\n    except Exception:\n        return long_description\n\n\ndef main():\n    version = get_version()\n\n    if sys.argv[1] == \"release\":\n        if not distutils.spawn.find_executable(\"twine\"):\n            print(\n                \"Please install twine:\\n\\n\\tpip install twine\\n\",\n                file=sys.stderr,\n            )\n            sys.exit(1)\n\n        commands = [\n            \"python tests/docs_tests/man_tests/test_labelme_1.py\",\n            \"git tag v{:s}\".format(version),\n            \"git push origin master --tag\",\n            \"python setup.py sdist\",\n            \"twine upload dist/labelme-{:s}.tar.gz\".format(version),\n        ]\n        for cmd in commands:\n            print(\"+ {:s}\".format(cmd))\n            subprocess.check_call(shlex.split(cmd))\n        sys.exit(0)\n\n    setup(\n        name=\"labelme\",\n        version=version,\n        packages=find_packages(exclude=[\"github2pypi\"]),\n        description=\"Image Polygonal Annotation with Python\",\n        long_description=get_long_description(),\n        long_description_content_type=\"text/markdown\",\n        author=\"Kentaro Wada\",\n        author_email=\"www.kentaro.wada@gmail.com\",\n        url=\"https://github.com/wkentaro/labelme\",\n        install_requires=get_install_requires(),\n        license=\"GPLv3\",\n        keywords=\"Image Annotation, Machine Learning\",\n        classifiers=[\n            \"Development Status :: 5 - Production/Stable\",\n            \"Intended Audience :: Developers\",\n            \"Natural Language :: English\",\n            \"Programming Language :: Python\",\n            \"Programming Language :: Python :: 2.7\",\n            \"Programming Language :: Python :: 3.5\",\n            \"Programming Language :: Python :: 3.6\",\n            \"Programming Language :: Python :: 3.7\",\n            \"Programming Language :: Python :: Implementation :: CPython\",\n            \"Programming Language :: Python :: Implementation :: PyPy\",\n        ],\n        package_data={\"labelme\": [\"icons/*\", \"config/*.yaml\"]},\n        entry_points={\n            \"console_scripts\": [\n                \"labelme=labelme.__main__:main\",\n                \"labelme_draw_json=labelme.cli.draw_json:main\",\n                \"labelme_draw_label_png=labelme.cli.draw_label_png:main\",\n                \"labelme_json_to_dataset=labelme.cli.json_to_dataset:main\",\n                \"labelme_on_docker=labelme.cli.on_docker:main\",\n            ],\n        },\n        data_files=[(\"share/man/man1\", [\"docs/man/labelme.1\"])],\n    )\n\n\nif __name__ == \"__main__\":\n    main()\n"
  },
  {
    "path": "DLTA_AI_app/tempCodeRunnerFile.py",
    "content": "test_vid_1"
  },
  {
    "path": "DLTA_AI_app/trackers/__init__.py",
    "content": ""
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/basetrack.py",
    "content": "import numpy as np\nfrom collections import OrderedDict\n\n\nclass TrackState(object):\n    New = 0\n    Tracked = 1\n    Lost = 2\n    LongLost = 3\n    Removed = 4\n\n\nclass BaseTrack(object):\n    _count = 0\n\n    track_id = 0\n    is_activated = False\n    state = TrackState.New\n\n    history = OrderedDict()\n    features = []\n    curr_feature = None\n    score = 0\n    start_frame = 0\n    frame_id = 0\n    time_since_update = 0\n\n    # multi-camera\n    location = (np.inf, np.inf)\n\n    @property\n    def end_frame(self):\n        return self.frame_id\n\n    @staticmethod\n    def next_id():\n        BaseTrack._count += 1\n        return BaseTrack._count\n\n    def activate(self, *args):\n        raise NotImplementedError\n\n    def predict(self):\n        raise NotImplementedError\n\n    def update(self, *args, **kwargs):\n        raise NotImplementedError\n\n    def mark_lost(self):\n        self.state = TrackState.Lost\n\n    def mark_long_lost(self):\n        self.state = TrackState.LongLost\n\n    def mark_removed(self):\n        self.state = TrackState.Removed\n\n    @staticmethod\n    def clear_count():\n        BaseTrack._count = 0\n"
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/bot_sort.py",
    "content": "import cv2\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom collections import deque\n\nfrom trackers.botsort import  matching\nfrom trackers.botsort.gmc import GMC\nfrom trackers.botsort.basetrack import BaseTrack, TrackState\nfrom trackers.botsort.kalman_filter import KalmanFilter\n\n# from fast_reid.fast_reid_interfece import FastReIDInterface\n\nfrom .reid_multibackend import ReIDDetectMultiBackend\nfrom ultralytics.yolo.utils.ops import xyxy2xywh, xywh2xyxy\n\n\nclass STrack(BaseTrack):\n    shared_kalman = KalmanFilter()\n\n    def __init__(self, tlwh, score, cls, feat=None, feat_history=50):\n\n        # wait activate\n        self._tlwh = np.asarray(tlwh, dtype=np.float32)\n        self.kalman_filter = None\n        self.mean, self.covariance = None, None\n        self.is_activated = False\n\n        self.cls = -1\n        self.cls_hist = []  # (cls id, freq)\n        self.update_cls(cls, score)\n\n        self.score = score\n        self.tracklet_len = 0\n\n        self.smooth_feat = None\n        self.curr_feat = None\n        if feat is not None:\n            self.update_features(feat)\n        self.features = deque([], maxlen=feat_history)\n        self.alpha = 0.9\n\n    def update_features(self, feat):\n        feat /= np.linalg.norm(feat)\n        self.curr_feat = feat\n        if self.smooth_feat is None:\n            self.smooth_feat = feat\n        else:\n            self.smooth_feat = self.alpha * self.smooth_feat + (1 - self.alpha) * feat\n        self.features.append(feat)\n        self.smooth_feat /= np.linalg.norm(self.smooth_feat)\n\n    def update_cls(self, cls, score):\n        if len(self.cls_hist) > 0:\n            max_freq = 0\n            found = False\n            for c in self.cls_hist:\n                if cls == c[0]:\n                    c[1] += score\n                    found = True\n\n                if c[1] > max_freq:\n                    max_freq = c[1]\n                    self.cls = c[0]\n            if not found:\n                self.cls_hist.append([cls, score])\n                self.cls = cls\n        else:\n            self.cls_hist.append([cls, score])\n            self.cls = cls\n\n    def predict(self):\n        mean_state = self.mean.copy()\n        if self.state != TrackState.Tracked:\n            mean_state[6] = 0\n            mean_state[7] = 0\n\n        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)\n\n    @staticmethod\n    def multi_predict(stracks):\n        if len(stracks) > 0:\n            multi_mean = np.asarray([st.mean.copy() for st in stracks])\n            multi_covariance = np.asarray([st.covariance for st in stracks])\n            for i, st in enumerate(stracks):\n                if st.state != TrackState.Tracked:\n                    multi_mean[i][6] = 0\n                    multi_mean[i][7] = 0\n            multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)\n            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):\n                stracks[i].mean = mean\n                stracks[i].covariance = cov\n\n    @staticmethod\n    def multi_gmc(stracks, H=np.eye(2, 3)):\n        if len(stracks) > 0:\n            multi_mean = np.asarray([st.mean.copy() for st in stracks])\n            multi_covariance = np.asarray([st.covariance for st in stracks])\n\n            R = H[:2, :2]\n            R8x8 = np.kron(np.eye(4, dtype=float), R)\n            t = H[:2, 2]\n\n            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):\n                mean = R8x8.dot(mean)\n                mean[:2] += t\n                cov = R8x8.dot(cov).dot(R8x8.transpose())\n\n                stracks[i].mean = mean\n                stracks[i].covariance = cov\n\n    def activate(self, kalman_filter, frame_id):\n        \"\"\"Start a new tracklet\"\"\"\n        self.kalman_filter = kalman_filter\n        self.track_id = self.next_id()\n\n        self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xywh(self._tlwh))\n\n        self.tracklet_len = 0\n        self.state = TrackState.Tracked\n        if frame_id == 1:\n            self.is_activated = True\n        self.frame_id = frame_id\n        self.start_frame = frame_id\n\n    def re_activate(self, new_track, frame_id, new_id=False):\n\n        self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, self.tlwh_to_xywh(new_track.tlwh))\n        if new_track.curr_feat is not None:\n            self.update_features(new_track.curr_feat)\n        self.tracklet_len = 0\n        self.state = TrackState.Tracked\n        self.is_activated = True\n        self.frame_id = frame_id\n        if new_id:\n            self.track_id = self.next_id()\n        self.score = new_track.score\n\n        self.update_cls(new_track.cls, new_track.score)\n\n    def update(self, new_track, frame_id):\n        \"\"\"\n        Update a matched track\n        :type new_track: STrack\n        :type frame_id: int\n        :type update_feature: bool\n        :return:\n        \"\"\"\n        self.frame_id = frame_id\n        self.tracklet_len += 1\n\n        new_tlwh = new_track.tlwh\n\n        self.mean, self.covariance = self.kalman_filter.update(self.mean, self.covariance, self.tlwh_to_xywh(new_tlwh))\n\n        if new_track.curr_feat is not None:\n            self.update_features(new_track.curr_feat)\n\n        self.state = TrackState.Tracked\n        self.is_activated = True\n\n        self.score = new_track.score\n        self.update_cls(new_track.cls, new_track.score)\n\n    @property\n    def tlwh(self):\n        \"\"\"Get current position in bounding box format `(top left x, top left y,\n                width, height)`.\n        \"\"\"\n        if self.mean is None:\n            return self._tlwh.copy()\n        ret = self.mean[:4].copy()\n        ret[:2] -= ret[2:] / 2\n        return ret\n\n    @property\n    def tlbr(self):\n        \"\"\"Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,\n        `(top left, bottom right)`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    @property\n    def xywh(self):\n        \"\"\"Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,\n        `(top left, bottom right)`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[:2] += ret[2:] / 2.0\n        return ret\n\n    @staticmethod\n    def tlwh_to_xyah(tlwh):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = np.asarray(tlwh).copy()\n        ret[:2] += ret[2:] / 2\n        ret[2] /= ret[3]\n        return ret\n\n    @staticmethod\n    def tlwh_to_xywh(tlwh):\n        \"\"\"Convert bounding box to format `(center x, center y, width,\n        height)`.\n        \"\"\"\n        ret = np.asarray(tlwh).copy()\n        ret[:2] += ret[2:] / 2\n        return ret\n\n    def to_xywh(self):\n        return self.tlwh_to_xywh(self.tlwh)\n\n    @staticmethod\n    def tlbr_to_tlwh(tlbr):\n        ret = np.asarray(tlbr).copy()\n        ret[2:] -= ret[:2]\n        return ret\n\n    @staticmethod\n    def tlwh_to_tlbr(tlwh):\n        ret = np.asarray(tlwh).copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    def __repr__(self):\n        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)\n\n\nclass BoTSORT(object):\n    def __init__(self, \n                model_weights,\n                device,\n                fp16,\n                track_high_thresh:float = 0.45,\n                new_track_thresh:float = 0.6,\n                track_buffer:int = 30,\n                match_thresh:float = 0.8,\n                proximity_thresh:float = 0.5,\n                appearance_thresh:float = 0.25,\n                cmc_method:str = 'sparseOptFlow',\n                frame_rate=30,\n                lambda_=0.985\n                ):\n\n        self.tracked_stracks = []  # type: list[STrack]\n        self.lost_stracks = []  # type: list[STrack]\n        self.removed_stracks = []  # type: list[STrack]\n        BaseTrack.clear_count()\n\n        self.frame_id = 0\n\n        self.lambda_ = lambda_\n        self.track_high_thresh = track_high_thresh\n        self.new_track_thresh = new_track_thresh\n\n        self.buffer_size = int(frame_rate / 30.0 * track_buffer)\n        self.max_time_lost = self.buffer_size\n        self.kalman_filter = KalmanFilter()\n\n        # ReID module\n        self.proximity_thresh = proximity_thresh\n        self.appearance_thresh = appearance_thresh\n        self.match_thresh = match_thresh\n\n        self.model = ReIDDetectMultiBackend(weights=model_weights, device=device, fp16=fp16)\n\n        self.gmc = GMC(method=cmc_method, verbose=[None,False])\n\n    def update(self, output_results, img):\n        self.frame_id += 1\n        activated_starcks = []\n        refind_stracks = []\n        lost_stracks = []\n        removed_stracks = []\n        \n        xyxys = output_results[:, 0:4]\n        xywh = xyxy2xywh(xyxys.numpy())\n        confs = output_results[:, 4]\n        clss = output_results[:, 5]\n        \n        classes = clss.numpy()\n        xyxys = xyxys.numpy()\n        confs = confs.numpy()\n\n        remain_inds = confs > self.track_high_thresh\n        inds_low = confs > 0.1\n        inds_high = confs < self.track_high_thresh\n\n        inds_second = np.logical_and(inds_low, inds_high)\n        \n        dets_second = xywh[inds_second]\n        dets = xywh[remain_inds]\n        \n        scores_keep = confs[remain_inds]\n        scores_second = confs[inds_second]\n        \n        classes_keep = classes[remain_inds]\n        clss_second = classes[inds_second]\n\n        self.height, self.width = img.shape[:2]\n\n        '''Extract embeddings '''\n        features_keep = self._get_features(dets, img)\n\n        if len(dets) > 0:\n            '''Detections'''\n            \n            detections = [STrack(xyxy, s, c, f.cpu().numpy()) for\n                              (xyxy, s, c, f) in zip(dets, scores_keep, classes_keep, features_keep)]\n        else:\n            detections = []\n\n        ''' Add newly detected tracklets to tracked_stracks'''\n        unconfirmed = []\n        tracked_stracks = []  # type: list[STrack]\n        for track in self.tracked_stracks:\n            if not track.is_activated:\n                unconfirmed.append(track)\n            else:\n                tracked_stracks.append(track)\n\n        ''' Step 2: First association, with high score detection boxes'''\n        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)\n\n        # Predict the current location with KF\n        STrack.multi_predict(strack_pool)\n\n        # Fix camera motion\n        warp = self.gmc.apply(img, dets)\n        STrack.multi_gmc(strack_pool, warp)\n        STrack.multi_gmc(unconfirmed, warp)\n\n        # Associate with high score detection boxes\n        raw_emb_dists = matching.embedding_distance(strack_pool, detections)\n        dists = matching.fuse_motion(self.kalman_filter, raw_emb_dists, strack_pool, detections, only_position=False, lambda_=self.lambda_)\n\n        # ious_dists = matching.iou_distance(strack_pool, detections)\n        # ious_dists_mask = (ious_dists > self.proximity_thresh)\n\n        # ious_dists = matching.fuse_score(ious_dists, detections)\n\n        # emb_dists = matching.embedding_distance(strack_pool, detections) / 2.0\n        # raw_emb_dists = emb_dists.copy()\n        # emb_dists[emb_dists > self.appearance_thresh] = 1.0\n        # emb_dists[ious_dists_mask] = 1.0\n        # dists = np.minimum(ious_dists, emb_dists)\n\n            # Popular ReID method (JDE / FairMOT)\n            # raw_emb_dists = matching.embedding_distance(strack_pool, detections)\n            # dists = matching.fuse_motion(self.kalman_filter, raw_emb_dists, strack_pool, detections)\n            # emb_dists = dists\n\n            # IoU making ReID\n            # dists = matching.embedding_distance(strack_pool, detections)\n            # dists[ious_dists_mask] = 1.0\n    \n        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.match_thresh)\n\n        for itracked, idet in matches:\n            track = strack_pool[itracked]\n            det = detections[idet]\n            if track.state == TrackState.Tracked:\n                track.update(detections[idet], self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        ''' Step 3: Second association, with low score detection boxes'''\n        # if len(scores):\n        #     inds_high = scores < self.track_high_thresh\n        #     inds_low = scores > self.track_low_thresh\n        #     inds_second = np.logical_and(inds_low, inds_high)\n        #     dets_second = bboxes[inds_second]\n        #     scores_second = scores[inds_second]\n        #     classes_second = classes[inds_second]\n        # else:\n        #     dets_second = []\n        #     scores_second = []\n        #     classes_second = []\n\n        # association the untrack to the low score detections\n        if len(dets_second) > 0:\n            '''Detections'''\n            detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s, c) for\n                (tlbr, s, c) in zip(dets_second, scores_second, clss_second)]\n        else:\n            detections_second = []\n\n        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]\n        dists = matching.iou_distance(r_tracked_stracks, detections_second)\n        matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)\n        for itracked, idet in matches:\n            track = r_tracked_stracks[itracked]\n            det = detections_second[idet]\n            if track.state == TrackState.Tracked:\n                track.update(det, self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        for it in u_track:\n            track = r_tracked_stracks[it]\n            if not track.state == TrackState.Lost:\n                track.mark_lost()\n                lost_stracks.append(track)\n\n        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''\n        detections = [detections[i] for i in u_detection]\n        ious_dists = matching.iou_distance(unconfirmed, detections)\n        ious_dists_mask = (ious_dists > self.proximity_thresh)\n        \n        ious_dists = matching.fuse_score(ious_dists, detections)\n    \n        emb_dists = matching.embedding_distance(unconfirmed, detections) / 2.0\n        raw_emb_dists = emb_dists.copy()\n        emb_dists[emb_dists > self.appearance_thresh] = 1.0\n        emb_dists[ious_dists_mask] = 1.0\n        dists = np.minimum(ious_dists, emb_dists)\n    \n        matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)\n        for itracked, idet in matches:\n            unconfirmed[itracked].update(detections[idet], self.frame_id)\n            activated_starcks.append(unconfirmed[itracked])\n        for it in u_unconfirmed:\n            track = unconfirmed[it]\n            track.mark_removed()\n            removed_stracks.append(track)\n\n        \"\"\" Step 4: Init new stracks\"\"\"\n        for inew in u_detection:\n            track = detections[inew]\n            if track.score < self.new_track_thresh:\n                continue\n\n            track.activate(self.kalman_filter, self.frame_id)\n            activated_starcks.append(track)\n\n        \"\"\" Step 5: Update state\"\"\"\n        for track in self.lost_stracks:\n            if self.frame_id - track.end_frame > self.max_time_lost:\n                track.mark_removed()\n                removed_stracks.append(track)\n\n        \"\"\" Merge \"\"\"\n        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]\n        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)\n        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)\n        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)\n        self.lost_stracks.extend(lost_stracks)\n        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)\n        self.removed_stracks.extend(removed_stracks)\n        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)\n\n        # output_stracks = [track for track in self.tracked_stracks if track.is_activated]\n        output_stracks = [track for track in self.tracked_stracks if track.is_activated]\n        outputs = []\n        for t in output_stracks:\n            output= []\n            tlwh = t.tlwh\n            tid = t.track_id\n            tlwh = np.expand_dims(tlwh, axis=0)\n            xyxy = xywh2xyxy(tlwh)\n            xyxy = np.squeeze(xyxy, axis=0)\n            output.extend(xyxy)\n            output.append(tid)\n            output.append(t.cls)\n            output.append(t.score)\n            outputs.append(output)\n\n        return outputs\n\n    def _xywh_to_xyxy(self, bbox_xywh):\n        x, y, w, h = bbox_xywh\n        x1 = max(int(x - w / 2), 0)\n        x2 = min(int(x + w / 2), self.width - 1)\n        y1 = max(int(y - h / 2), 0)\n        y2 = min(int(y + h / 2), self.height - 1)\n        return x1, y1, x2, y2\n\n    def _get_features(self, bbox_xywh, ori_img):\n        im_crops = []\n        for box in bbox_xywh:\n            x1, y1, x2, y2 = self._xywh_to_xyxy(box)\n            im = ori_img[y1:y2, x1:x2]\n            im_crops.append(im)\n        if im_crops:\n            features = self.model(im_crops)\n        else:\n            features = np.array([])\n        return features\n\ndef joint_stracks(tlista, tlistb):\n    exists = {}\n    res = []\n    for t in tlista:\n        exists[t.track_id] = 1\n        res.append(t)\n    for t in tlistb:\n        tid = t.track_id\n        if not exists.get(tid, 0):\n            exists[tid] = 1\n            res.append(t)\n    return res\n\n\ndef sub_stracks(tlista, tlistb):\n    stracks = {}\n    for t in tlista:\n        stracks[t.track_id] = t\n    for t in tlistb:\n        tid = t.track_id\n        if stracks.get(tid, 0):\n            del stracks[tid]\n    return list(stracks.values())\n\n\ndef remove_duplicate_stracks(stracksa, stracksb):\n    pdist = matching.iou_distance(stracksa, stracksb)\n    pairs = np.where(pdist < 0.15)\n    dupa, dupb = list(), list()\n    for p, q in zip(*pairs):\n        timep = stracksa[p].frame_id - stracksa[p].start_frame\n        timeq = stracksb[q].frame_id - stracksb[q].start_frame\n        if timep > timeq:\n            dupb.append(q)\n        else:\n            dupa.append(p)\n    resa = [t for i, t in enumerate(stracksa) if not i in dupa]\n    resb = [t for i, t in enumerate(stracksb) if not i in dupb]\n    return resa, resb\n"
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/configs/botsort.yaml",
    "content": "# Trial number:      232\n# HOTA, MOTA, IDF1:  [45.31]\nbotsort:\n  appearance_thresh: 0.4818211117541298\n  cmc_method: sparseOptFlow\n  conf_thres: 0.3501265956918775\n  frame_rate: 30\n  lambda_: 0.9896143462366406\n  match_thresh: 0.22734550911325851\n  new_track_thresh: 0.21144301345190655\n  proximity_thresh: 0.5945380911899254\n  track_buffer: 60\n  track_high_thresh: 0.33824964456239337\n"
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/gmc.py",
    "content": "import cv2\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport copy\nimport time\n\n\nclass GMC:\n    def __init__(self, method='sparseOptFlow', downscale=2, verbose=None):\n        super(GMC, self).__init__()\n\n        self.method = method\n        self.downscale = max(1, int(downscale))\n\n        if self.method == 'orb':\n            self.detector = cv2.FastFeatureDetector_create(20)\n            self.extractor = cv2.ORB_create()\n            self.matcher = cv2.BFMatcher(cv2.NORM_HAMMING)\n\n        elif self.method == 'sift':\n            self.detector = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)\n            self.extractor = cv2.SIFT_create(nOctaveLayers=3, contrastThreshold=0.02, edgeThreshold=20)\n            self.matcher = cv2.BFMatcher(cv2.NORM_L2)\n\n        elif self.method == 'ecc':\n            number_of_iterations = 5000\n            termination_eps = 1e-6\n            self.warp_mode = cv2.MOTION_EUCLIDEAN\n            self.criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)\n\n        elif self.method == 'sparseOptFlow':\n            self.feature_params = dict(maxCorners=1000, qualityLevel=0.01, minDistance=1, blockSize=3,\n                                       useHarrisDetector=False, k=0.04)\n            # self.gmc_file = open('GMC_results.txt', 'w')\n\n        elif self.method == 'file' or self.method == 'files':\n            seqName = verbose[0]\n            ablation = verbose[1]\n            if ablation:\n                filePath = r'tracker/GMC_files/MOT17_ablation'\n            else:\n                filePath = r'tracker/GMC_files/MOTChallenge'\n\n            if '-FRCNN' in seqName:\n                seqName = seqName[:-6]\n            elif '-DPM' in seqName:\n                seqName = seqName[:-4]\n            elif '-SDP' in seqName:\n                seqName = seqName[:-4]\n\n            self.gmcFile = open(filePath + \"/GMC-\" + seqName + \".txt\", 'r')\n\n            if self.gmcFile is None:\n                raise ValueError(\"Error: Unable to open GMC file in directory:\" + filePath)\n        elif self.method == 'none' or self.method == 'None':\n            self.method = 'none'\n        else:\n            raise ValueError(\"Error: Unknown CMC method:\" + method)\n\n        self.prevFrame = None\n        self.prevKeyPoints = None\n        self.prevDescriptors = None\n\n        self.initializedFirstFrame = False\n\n    def apply(self, raw_frame, detections=None):\n        if self.method == 'orb' or self.method == 'sift':\n            return self.applyFeaures(raw_frame, detections)\n        elif self.method == 'ecc':\n            return self.applyEcc(raw_frame, detections)\n        elif self.method == 'sparseOptFlow':\n            return self.applySparseOptFlow(raw_frame, detections)\n        elif self.method == 'file':\n            return self.applyFile(raw_frame, detections)\n        elif self.method == 'none':\n            return np.eye(2, 3)\n        else:\n            return np.eye(2, 3)\n\n    def applyEcc(self, raw_frame, detections=None):\n\n        # Initialize\n        height, width, _ = raw_frame.shape\n        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)\n        H = np.eye(2, 3, dtype=np.float32)\n\n        # Downscale image (TODO: consider using pyramids)\n        if self.downscale > 1.0:\n            frame = cv2.GaussianBlur(frame, (3, 3), 1.5)\n            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))\n            width = width // self.downscale\n            height = height // self.downscale\n\n        # Handle first frame\n        if not self.initializedFirstFrame:\n            # Initialize data\n            self.prevFrame = frame.copy()\n\n            # Initialization done\n            self.initializedFirstFrame = True\n\n            return H\n\n        # Run the ECC algorithm. The results are stored in warp_matrix.\n        # (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria)\n        try:\n            (cc, H) = cv2.findTransformECC(self.prevFrame, frame, H, self.warp_mode, self.criteria, None, 1)\n        except:\n            print('Warning: find transform failed. Set warp as identity')\n\n        return H\n\n    def applyFeaures(self, raw_frame, detections=None):\n\n        # Initialize\n        height, width, _ = raw_frame.shape\n        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)\n        H = np.eye(2, 3)\n\n        # Downscale image (TODO: consider using pyramids)\n        if self.downscale > 1.0:\n            # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)\n            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))\n            width = width // self.downscale\n            height = height // self.downscale\n\n        # find the keypoints\n        mask = np.zeros_like(frame)\n        # mask[int(0.05 * height): int(0.95 * height), int(0.05 * width): int(0.95 * width)] = 255\n        mask[int(0.02 * height): int(0.98 * height), int(0.02 * width): int(0.98 * width)] = 255\n        if detections is not None:\n            for det in detections:\n                tlbr = (det[:4] / self.downscale).astype(np.int_)\n                mask[tlbr[1]:tlbr[3], tlbr[0]:tlbr[2]] = 0\n\n        keypoints = self.detector.detect(frame, mask)\n\n        # compute the descriptors\n        keypoints, descriptors = self.extractor.compute(frame, keypoints)\n\n        # Handle first frame\n        if not self.initializedFirstFrame:\n            # Initialize data\n            self.prevFrame = frame.copy()\n            self.prevKeyPoints = copy.copy(keypoints)\n            self.prevDescriptors = copy.copy(descriptors)\n\n            # Initialization done\n            self.initializedFirstFrame = True\n\n            return H\n\n        # Match descriptors.\n        knnMatches = self.matcher.knnMatch(self.prevDescriptors, descriptors, 2)\n\n        # Filtered matches based on smallest spatial distance\n        matches = []\n        spatialDistances = []\n\n        maxSpatialDistance = 0.25 * np.array([width, height])\n\n        # Handle empty matches case\n        if len(knnMatches) == 0:\n            # Store to next iteration\n            self.prevFrame = frame.copy()\n            self.prevKeyPoints = copy.copy(keypoints)\n            self.prevDescriptors = copy.copy(descriptors)\n\n            return H\n\n        for m, n in knnMatches:\n            if m.distance < 0.9 * n.distance:\n                prevKeyPointLocation = self.prevKeyPoints[m.queryIdx].pt\n                currKeyPointLocation = keypoints[m.trainIdx].pt\n\n                spatialDistance = (prevKeyPointLocation[0] - currKeyPointLocation[0],\n                                   prevKeyPointLocation[1] - currKeyPointLocation[1])\n\n                if (np.abs(spatialDistance[0]) < maxSpatialDistance[0]) and \\\n                        (np.abs(spatialDistance[1]) < maxSpatialDistance[1]):\n                    spatialDistances.append(spatialDistance)\n                    matches.append(m)\n\n        meanSpatialDistances = np.mean(spatialDistances, 0)\n        stdSpatialDistances = np.std(spatialDistances, 0)\n\n        inliesrs = (spatialDistances - meanSpatialDistances) < 2.5 * stdSpatialDistances\n\n        goodMatches = []\n        prevPoints = []\n        currPoints = []\n        for i in range(len(matches)):\n            if inliesrs[i, 0] and inliesrs[i, 1]:\n                goodMatches.append(matches[i])\n                prevPoints.append(self.prevKeyPoints[matches[i].queryIdx].pt)\n                currPoints.append(keypoints[matches[i].trainIdx].pt)\n\n        prevPoints = np.array(prevPoints)\n        currPoints = np.array(currPoints)\n\n        # Draw the keypoint matches on the output image\n        if 0:\n            matches_img = np.hstack((self.prevFrame, frame))\n            matches_img = cv2.cvtColor(matches_img, cv2.COLOR_GRAY2BGR)\n            W = np.size(self.prevFrame, 1)\n            for m in goodMatches:\n                prev_pt = np.array(self.prevKeyPoints[m.queryIdx].pt, dtype=np.int_)\n                curr_pt = np.array(keypoints[m.trainIdx].pt, dtype=np.int_)\n                curr_pt[0] += W\n                color = np.random.randint(0, 255, (3,))\n                color = (int(color[0]), int(color[1]), int(color[2]))\n\n                matches_img = cv2.line(matches_img, prev_pt, curr_pt, tuple(color), 1, cv2.LINE_AA)\n                matches_img = cv2.circle(matches_img, prev_pt, 2, tuple(color), -1)\n                matches_img = cv2.circle(matches_img, curr_pt, 2, tuple(color), -1)\n\n            plt.figure()\n            plt.imshow(matches_img)\n            plt.show()\n\n        # Find rigid matrix\n        if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):\n            H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)\n\n            # Handle downscale\n            if self.downscale > 1.0:\n                H[0, 2] *= self.downscale\n                H[1, 2] *= self.downscale\n        else:\n            print('Warning: not enough matching points')\n\n        # Store to next iteration\n        self.prevFrame = frame.copy()\n        self.prevKeyPoints = copy.copy(keypoints)\n        self.prevDescriptors = copy.copy(descriptors)\n\n        return H\n\n    def applySparseOptFlow(self, raw_frame, detections=None):\n\n        t0 = time.time()\n\n        # Initialize\n        height, width, _ = raw_frame.shape\n        frame = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY)\n        H = np.eye(2, 3)\n\n        # Downscale image\n        if self.downscale > 1.0:\n            # frame = cv2.GaussianBlur(frame, (3, 3), 1.5)\n            frame = cv2.resize(frame, (width // self.downscale, height // self.downscale))\n\n        # find the keypoints\n        keypoints = cv2.goodFeaturesToTrack(frame, mask=None, **self.feature_params)\n\n        # Handle first frame\n        if not self.initializedFirstFrame:\n            # Initialize data\n            self.prevFrame = frame.copy()\n            self.prevKeyPoints = copy.copy(keypoints)\n\n            # Initialization done\n            self.initializedFirstFrame = True\n\n            return H\n\n        # find correspondences\n        matchedKeypoints, status, err = cv2.calcOpticalFlowPyrLK(self.prevFrame, frame, self.prevKeyPoints, None)\n\n        # leave good correspondences only\n        prevPoints = []\n        currPoints = []\n\n        for i in range(len(status)):\n            if status[i]:\n                prevPoints.append(self.prevKeyPoints[i])\n                currPoints.append(matchedKeypoints[i])\n\n        prevPoints = np.array(prevPoints)\n        currPoints = np.array(currPoints)\n\n        # Find rigid matrix\n        if (np.size(prevPoints, 0) > 4) and (np.size(prevPoints, 0) == np.size(prevPoints, 0)):\n            H, inliesrs = cv2.estimateAffinePartial2D(prevPoints, currPoints, cv2.RANSAC)\n\n            # Handle downscale\n            if self.downscale > 1.0:\n                H[0, 2] *= self.downscale\n                H[1, 2] *= self.downscale\n        else:\n            print('Warning: not enough matching points')\n\n        # Store to next iteration\n        self.prevFrame = frame.copy()\n        self.prevKeyPoints = copy.copy(keypoints)\n\n        t1 = time.time()\n\n        # gmc_line = str(1000 * (t1 - t0)) + \"\\t\" + str(H[0, 0]) + \"\\t\" + str(H[0, 1]) + \"\\t\" + str(\n        #     H[0, 2]) + \"\\t\" + str(H[1, 0]) + \"\\t\" + str(H[1, 1]) + \"\\t\" + str(H[1, 2]) + \"\\n\"\n        # self.gmc_file.write(gmc_line)\n\n        return H\n\n    def applyFile(self, raw_frame, detections=None):\n        line = self.gmcFile.readline()\n        tokens = line.split(\"\\t\")\n        H = np.eye(2, 3, dtype=np.float_)\n        H[0, 0] = float(tokens[1])\n        H[0, 1] = float(tokens[2])\n        H[0, 2] = float(tokens[3])\n        H[1, 0] = float(tokens[4])\n        H[1, 1] = float(tokens[5])\n        H[1, 2] = float(tokens[6])\n\n        return H"
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/kalman_filter.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\n\n\n\"\"\"\nTable for the 0.95 quantile of the chi-square distribution with N degrees of\nfreedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv\nfunction and used as Mahalanobis gating threshold.\n\"\"\"\nchi2inv95 = {\n    1: 3.8415,\n    2: 5.9915,\n    3: 7.8147,\n    4: 9.4877,\n    5: 11.070,\n    6: 12.592,\n    7: 14.067,\n    8: 15.507,\n    9: 16.919}\n\n\nclass KalmanFilter(object):\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n\n    The 8-dimensional state space\n\n        x, y, w, h, vx, vy, vw, vh\n\n    contains the bounding box center position (x, y), width w, height h,\n    and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, w, h) is taken as direct observation of the state space (linear\n    observation model).\n\n    \"\"\"\n\n    def __init__(self):\n        ndim, dt = 4, 1.\n\n        # Create Kalman filter model matrices.\n        self._motion_mat = np.eye(2 * ndim, 2 * ndim)\n        for i in range(ndim):\n            self._motion_mat[i, ndim + i] = dt\n        self._update_mat = np.eye(ndim, 2 * ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_position = 1. / 20\n        self._std_weight_velocity = 1. / 160\n\n    def initiate(self, measurement):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, w, h) with center position (x, y),\n            width w, and height h.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (8 dimensional) and covariance matrix (8x8\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n\n        \"\"\"\n        mean_pos = measurement\n        mean_vel = np.zeros_like(mean_pos)\n        mean = np.r_[mean_pos, mean_vel]\n\n        std = [\n            2 * self._std_weight_position * measurement[2],\n            2 * self._std_weight_position * measurement[3],\n            2 * self._std_weight_position * measurement[2],\n            2 * self._std_weight_position * measurement[3],\n            10 * self._std_weight_velocity * measurement[2],\n            10 * self._std_weight_velocity * measurement[3],\n            10 * self._std_weight_velocity * measurement[2],\n            10 * self._std_weight_velocity * measurement[3]]\n        covariance = np.diag(np.square(std))\n        return mean, covariance\n\n    def predict(self, mean, covariance):\n        \"\"\"Run Kalman filter prediction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The 8 dimensional mean vector of the object state at the previous\n            time step.\n        covariance : ndarray\n            The 8x8 dimensional covariance matrix of the object state at the\n            previous time step.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n\n        \"\"\"\n        std_pos = [\n            self._std_weight_position * mean[2],\n            self._std_weight_position * mean[3],\n            self._std_weight_position * mean[2],\n            self._std_weight_position * mean[3]]\n        std_vel = [\n            self._std_weight_velocity * mean[2],\n            self._std_weight_velocity * mean[3],\n            self._std_weight_velocity * mean[2],\n            self._std_weight_velocity * mean[3]]\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))\n\n        mean = np.dot(mean, self._motion_mat.T)\n        covariance = np.linalg.multi_dot((\n            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov\n\n        return mean, covariance\n\n    def project(self, mean, covariance):\n        \"\"\"Project state distribution to measurement space.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The state's mean vector (8 dimensional array).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the projected mean and covariance matrix of the given state\n            estimate.\n\n        \"\"\"\n        std = [\n            self._std_weight_position * mean[2],\n            self._std_weight_position * mean[3],\n            self._std_weight_position * mean[2],\n            self._std_weight_position * mean[3]]\n        innovation_cov = np.diag(np.square(std))\n\n        mean = np.dot(self._update_mat, mean)\n        covariance = np.linalg.multi_dot((\n            self._update_mat, covariance, self._update_mat.T))\n        return mean, covariance + innovation_cov\n\n    def multi_predict(self, mean, covariance):\n        \"\"\"Run Kalman filter prediction step (Vectorized version).\n        Parameters\n        ----------\n        mean : ndarray\n            The Nx8 dimensional mean matrix of the object states at the previous\n            time step.\n        covariance : ndarray\n            The Nx8x8 dimensional covariance matrics of the object states at the\n            previous time step.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n        \"\"\"\n        std_pos = [\n            self._std_weight_position * mean[:, 2],\n            self._std_weight_position * mean[:, 3],\n            self._std_weight_position * mean[:, 2],\n            self._std_weight_position * mean[:, 3]]\n        std_vel = [\n            self._std_weight_velocity * mean[:, 2],\n            self._std_weight_velocity * mean[:, 3],\n            self._std_weight_velocity * mean[:, 2],\n            self._std_weight_velocity * mean[:, 3]]\n        sqr = np.square(np.r_[std_pos, std_vel]).T\n\n        motion_cov = []\n        for i in range(len(mean)):\n            motion_cov.append(np.diag(sqr[i]))\n        motion_cov = np.asarray(motion_cov)\n\n        mean = np.dot(mean, self._motion_mat.T)\n        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))\n        covariance = np.dot(left, self._motion_mat.T) + motion_cov\n\n        return mean, covariance\n\n    def update(self, mean, covariance, measurement):\n        \"\"\"Run Kalman filter correction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (8 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        measurement : ndarray\n            The 4 dimensional measurement vector (x, y, w, h), where (x, y)\n            is the center position, w the width, and h the height of the\n            bounding box.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n\n        \"\"\"\n        projected_mean, projected_cov = self.project(mean, covariance)\n\n        chol_factor, lower = scipy.linalg.cho_factor(\n            projected_cov, lower=True, check_finite=False)\n        kalman_gain = scipy.linalg.cho_solve(\n            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,\n            check_finite=False).T\n        innovation = measurement - projected_mean\n\n        new_mean = mean + np.dot(innovation, kalman_gain.T)\n        new_covariance = covariance - np.linalg.multi_dot((\n            kalman_gain, projected_cov, kalman_gain.T))\n        return new_mean, new_covariance\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False, metric='maha'):\n        \"\"\"Compute gating distance between state distribution and measurements.\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n        \"\"\"\n        mean, covariance = self.project(mean, covariance)\n        if only_position:\n            mean, covariance = mean[:2], covariance[:2, :2]\n            measurements = measurements[:, :2]\n\n        d = measurements - mean\n        if metric == 'gaussian':\n            return np.sum(d * d, axis=1)\n        elif metric == 'maha':\n            cholesky_factor = np.linalg.cholesky(covariance)\n            z = scipy.linalg.solve_triangular(\n                cholesky_factor, d.T, lower=True, check_finite=False,\n                overwrite_b=True)\n            squared_maha = np.sum(z * z, axis=0)\n            return squared_maha\n        else:\n            raise ValueError('invalid distance metric')"
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/matching.py",
    "content": "import numpy as np\nimport scipy\nimport lap\nfrom scipy.spatial.distance import cdist\n\nfrom trackers.botsort import kalman_filter\n\n\ndef merge_matches(m1, m2, shape):\n    O,P,Q = shape\n    m1 = np.asarray(m1)\n    m2 = np.asarray(m2)\n\n    M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))\n    M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))\n\n    mask = M1*M2\n    match = mask.nonzero()\n    match = list(zip(match[0], match[1]))\n    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))\n    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))\n\n    return match, unmatched_O, unmatched_Q\n\n\ndef _indices_to_matches(cost_matrix, indices, thresh):\n    matched_cost = cost_matrix[tuple(zip(*indices))]\n    matched_mask = (matched_cost <= thresh)\n\n    matches = indices[matched_mask]\n    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))\n    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))\n\n    return matches, unmatched_a, unmatched_b\n\n\ndef linear_assignment(cost_matrix, thresh):\n    if cost_matrix.size == 0:\n        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))\n    matches, unmatched_a, unmatched_b = [], [], []\n    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)\n    for ix, mx in enumerate(x):\n        if mx >= 0:\n            matches.append([ix, mx])\n    unmatched_a = np.where(x < 0)[0]\n    unmatched_b = np.where(y < 0)[0]\n    matches = np.asarray(matches)\n    return matches, unmatched_a, unmatched_b\n\n\ndef ious(atlbrs, btlbrs):\n    \"\"\"\n    Compute cost based on IoU\n    :type atlbrs: list[tlbr] | np.ndarray\n    :type atlbrs: list[tlbr] | np.ndarray\n\n    :rtype ious np.ndarray\n    \"\"\"\n    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32)\n    if ious.size == 0:\n        return ious\n\n    ious = bbox_ious(\n        np.ascontiguousarray(atlbrs, dtype=np.float32),\n        np.ascontiguousarray(btlbrs, dtype=np.float32)\n    )\n\n    return ious\n\n\ndef tlbr_expand(tlbr, scale=1.2):\n    w = tlbr[2] - tlbr[0]\n    h = tlbr[3] - tlbr[1]\n\n    half_scale = 0.5 * scale\n\n    tlbr[0] -= half_scale * w\n    tlbr[1] -= half_scale * h\n    tlbr[2] += half_scale * w\n    tlbr[3] += half_scale * h\n\n    return tlbr\n\n\ndef iou_distance(atracks, btracks):\n    \"\"\"\n    Compute cost based on IoU\n    :type atracks: list[STrack]\n    :type btracks: list[STrack]\n\n    :rtype cost_matrix np.ndarray\n    \"\"\"\n\n    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):\n        atlbrs = atracks\n        btlbrs = btracks\n    else:\n        atlbrs = [track.tlbr for track in atracks]\n        btlbrs = [track.tlbr for track in btracks]\n    _ious = ious(atlbrs, btlbrs)\n    cost_matrix = 1 - _ious\n\n    return cost_matrix\n\n\ndef v_iou_distance(atracks, btracks):\n    \"\"\"\n    Compute cost based on IoU\n    :type atracks: list[STrack]\n    :type btracks: list[STrack]\n\n    :rtype cost_matrix np.ndarray\n    \"\"\"\n\n    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):\n        atlbrs = atracks\n        btlbrs = btracks\n    else:\n        atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]\n        btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]\n    _ious = ious(atlbrs, btlbrs)\n    cost_matrix = 1 - _ious\n\n    return cost_matrix\n\n\ndef embedding_distance(tracks, detections, metric='cosine'):\n    \"\"\"\n    :param tracks: list[STrack]\n    :param detections: list[BaseTrack]\n    :param metric:\n    :return: cost_matrix np.ndarray\n    \"\"\"\n\n    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)\n    if cost_matrix.size == 0:\n        return cost_matrix\n    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32)\n    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32)\n\n    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # / 2.0  # Nomalized features\n    return cost_matrix\n\n\ndef gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    gating_dim = 2 if only_position else 4\n    gating_threshold = kalman_filter.chi2inv95[gating_dim]\n    # measurements = np.asarray([det.to_xyah() for det in detections])\n    measurements = np.asarray([det.to_xywh() for det in detections])\n    for row, track in enumerate(tracks):\n        gating_distance = kf.gating_distance(\n            track.mean, track.covariance, measurements, only_position)\n        cost_matrix[row, gating_distance > gating_threshold] = np.inf\n    return cost_matrix\n\n\ndef fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    gating_dim = 2 if only_position else 4\n    gating_threshold = kalman_filter.chi2inv95[gating_dim]\n    # measurements = np.asarray([det.to_xyah() for det in detections])\n    measurements = np.asarray([det.to_xywh() for det in detections])\n    for row, track in enumerate(tracks):\n        gating_distance = kf.gating_distance(\n            track.mean, track.covariance, measurements, only_position, metric='maha')\n        cost_matrix[row, gating_distance > gating_threshold] = np.inf\n        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance\n    return cost_matrix\n\n\ndef fuse_iou(cost_matrix, tracks, detections):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    reid_sim = 1 - cost_matrix\n    iou_dist = iou_distance(tracks, detections)\n    iou_sim = 1 - iou_dist\n    fuse_sim = reid_sim * (1 + iou_sim) / 2\n    det_scores = np.array([det.score for det in detections])\n    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)\n    #fuse_sim = fuse_sim * (1 + det_scores) / 2\n    fuse_cost = 1 - fuse_sim\n    return fuse_cost\n\n\ndef fuse_score(cost_matrix, detections):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    iou_sim = 1 - cost_matrix\n    det_scores = np.array([det.score for det in detections])\n    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)\n    fuse_sim = iou_sim * det_scores\n    fuse_cost = 1 - fuse_sim\n    return fuse_cost\n\ndef bbox_ious(boxes, query_boxes):\n    \"\"\"\n    Parameters\n    ----------\n    boxes: (N, 4) ndarray of float\n    query_boxes: (K, 4) ndarray of float\n    Returns\n    -------\n    overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n    \"\"\"\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    overlaps = np.zeros((N, K), dtype=np.float32)\n    \n    for k in range(K):\n        box_area = (\n            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\n            (query_boxes[k, 3] - query_boxes[k, 1] + 1)\n        )\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]) + 1\n            )\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]) + 1\n                )\n                if ih > 0:\n                    ua = float(\n                        (boxes[n, 2] - boxes[n, 0] + 1) *\n                        (boxes[n, 3] - boxes[n, 1] + 1) +\n                        box_area - iw * ih\n                    )\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps"
  },
  {
    "path": "DLTA_AI_app/trackers/botsort/reid_multibackend.py",
    "content": "import torch.nn as nn\nimport torch\nfrom pathlib import Path\nimport numpy as np\nfrom itertools import islice\nimport torchvision.transforms as transforms\nimport cv2\nimport sys\nimport torchvision.transforms as T\nfrom collections import OrderedDict, namedtuple\nimport gdown\nfrom os.path import exists as file_exists\n\n\nfrom ultralytics.yolo.utils.checks import check_requirements, check_version\nfrom ultralytics.yolo.utils import LOGGER\nfrom trackers.strongsort.deep.reid_model_factory import (show_downloadeable_models, get_model_url, get_model_name,\n                                                          download_url, load_pretrained_weights)\nfrom trackers.strongsort.deep.models import build_model\n\n\ndef check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):\n    # Check file(s) for acceptable suffix\n    if file and suffix:\n        if isinstance(suffix, str):\n            suffix = [suffix]\n        for f in file if isinstance(file, (list, tuple)) else [file]:\n            s = Path(f).suffix.lower()  # file suffix\n            if len(s):\n                assert s in suffix, f\"{msg}{f} acceptable suffix is {suffix}\"\n\n\nclass ReIDDetectMultiBackend(nn.Module):\n    # ReID models MultiBackend class for python inference on various backends\n    def __init__(self, weights='osnet_x0_25_msmt17.pt', device=torch.device('cpu'), fp16=False):\n        super().__init__()\n\n        w = weights[0] if isinstance(weights, list) else weights\n        self.pt, self.jit, self.onnx, self.xml, self.engine, self.tflite = self.model_type(w)  # get backend\n        self.fp16 = fp16\n        self.fp16 &= self.pt or self.jit or self.engine  # FP16\n\n        # Build transform functions\n        self.device = device\n        self.image_size=(256, 128)\n        self.pixel_mean=[0.485, 0.456, 0.406]\n        self.pixel_std=[0.229, 0.224, 0.225]\n        self.transforms = []\n        self.transforms += [T.Resize(self.image_size)]\n        self.transforms += [T.ToTensor()]\n        self.transforms += [T.Normalize(mean=self.pixel_mean, std=self.pixel_std)]\n        self.preprocess = T.Compose(self.transforms)\n        self.to_pil = T.ToPILImage()\n\n        model_name = get_model_name(w)\n\n        if w.suffix == '.pt':\n            model_url = get_model_url(w)\n            if not file_exists(w) and model_url is not None:\n                gdown.download(model_url, str(w), quiet=False)\n            elif file_exists(w):\n                pass\n            else:\n                print(f'No URL associated to the chosen StrongSORT weights ({w}). Choose between:')\n                show_downloadeable_models()\n                exit()\n\n        # Build model\n        self.model = build_model(\n            model_name,\n            num_classes=1,\n            pretrained=not (w and w.is_file()),\n            use_gpu=device\n        )\n\n        if self.pt:  # PyTorch\n            # populate model arch with weights\n            if w and w.is_file() and w.suffix == '.pt':\n                load_pretrained_weights(self.model, w)\n                \n            self.model.to(device).eval()\n            self.model.half() if self.fp16 else  self.model.float()\n        elif self.jit:\n            LOGGER.info(f'Loading {w} for TorchScript inference...')\n            self.model = torch.jit.load(w)\n            self.model.half() if self.fp16 else self.model.float()\n        elif self.onnx:  # ONNX Runtime\n            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')\n            cuda = torch.cuda.is_available() and device.type != 'cpu'\n            #check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))\n            import onnxruntime\n            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']\n            self.session = onnxruntime.InferenceSession(str(w), providers=providers)\n        elif self.engine:  # TensorRT\n            LOGGER.info(f'Loading {w} for TensorRT inference...')\n            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download\n            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0\n            if device.type == 'cpu':\n                device = torch.device('cuda:0')\n            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))\n            logger = trt.Logger(trt.Logger.INFO)\n            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:\n                self.model_ = runtime.deserialize_cuda_engine(f.read())\n            self.context = self.model_.create_execution_context()\n            self.bindings = OrderedDict()\n            self.fp16 = False  # default updated below\n            dynamic = False\n            for index in range(self.model_.num_bindings):\n                name = self.model_.get_binding_name(index)\n                dtype = trt.nptype(self.model_.get_binding_dtype(index))\n                if self.model_.binding_is_input(index):\n                    if -1 in tuple(self.model_.get_binding_shape(index)):  # dynamic\n                        dynamic = True\n                        self.context.set_binding_shape(index, tuple(self.model_.get_profile_shape(0, index)[2]))\n                    if dtype == np.float16:\n                        self.fp16 = True\n                shape = tuple(self.context.get_binding_shape(index))\n                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)\n                self.bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))\n            self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())\n            batch_size = self.bindings['images'].shape[0]  # if dynamic, this is instead max batch size\n        elif self.xml:  # OpenVINO\n            LOGGER.info(f'Loading {w} for OpenVINO inference...')\n            check_requirements(('openvino',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/\n            from openvino.runtime import Core, Layout, get_batch\n            ie = Core()\n            if not Path(w).is_file():  # if not *.xml\n                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir\n            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))\n            if network.get_parameters()[0].get_layout().empty:\n                network.get_parameters()[0].set_layout(Layout(\"NCWH\"))\n            batch_dim = get_batch(network)\n            if batch_dim.is_static:\n                batch_size = batch_dim.get_length()\n            self.executable_network = ie.compile_model(network, device_name=\"CPU\")  # device_name=\"MYRIAD\" for Intel NCS2\n            self.output_layer = next(iter(self.executable_network.outputs))\n        \n        elif self.tflite:\n            LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')\n            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu\n                from tflite_runtime.interpreter import Interpreter, load_delegate\n            except ImportError:\n                import tensorflow as tf\n                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,\n            self.interpreter = tf.lite.Interpreter(model_path=w)\n            self.interpreter.allocate_tensors()\n            # Get input and output tensors.\n            self.input_details = self.interpreter.get_input_details()\n            self.output_details = self.interpreter.get_output_details()\n            \n            # Test model on random input data.\n            input_data = np.array(np.random.random_sample((1,256,128,3)), dtype=np.float32)\n            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)\n            \n            self.interpreter.invoke()\n\n            # The function `get_tensor()` returns a copy of the tensor data.\n            output_data = self.interpreter.get_tensor(self.output_details[0]['index'])\n        else:\n            print('This model framework is not supported yet!')\n            exit()\n        \n        \n    @staticmethod\n    def model_type(p='path/to/model.pt'):\n        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx\n        from trackers.reid_export import export_formats\n        sf = list(export_formats().Suffix)  # export suffixes\n        check_suffix(p, sf)  # checks\n        types = [s in Path(p).name for s in sf]\n        return types\n\n    def _preprocess(self, im_batch):\n\n        images = []\n        for element in im_batch:\n            image = self.to_pil(element)\n            image = self.preprocess(image)\n            images.append(image)\n\n        images = torch.stack(images, dim=0)\n        images = images.to(self.device)\n\n        return images\n    \n    \n    def forward(self, im_batch):\n        \n        # preprocess batch\n        im_batch = self._preprocess(im_batch)\n\n        # batch to half\n        if self.fp16 and im_batch.dtype != torch.float16:\n           im_batch = im_batch.half()\n\n        # batch processing\n        features = []\n        if self.pt:\n            features = self.model(im_batch)\n        elif self.jit:  # TorchScript\n            features = self.model(im_batch)\n        elif self.onnx:  # ONNX Runtime\n            im_batch = im_batch.cpu().numpy()  # torch to numpy\n            features = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im_batch})[0]\n        elif self.engine:  # TensorRT\n            if True and im_batch.shape != self.bindings['images'].shape:\n                i_in, i_out = (self.model_.get_binding_index(x) for x in ('images', 'output'))\n                self.context.set_binding_shape(i_in, im_batch.shape)  # reshape if dynamic\n                self.bindings['images'] = self.bindings['images']._replace(shape=im_batch.shape)\n                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))\n            s = self.bindings['images'].shape\n            assert im_batch.shape == s, f\"input size {im_batch.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}\"\n            self.binding_addrs['images'] = int(im_batch.data_ptr())\n            self.context.execute_v2(list(self.binding_addrs.values()))\n            features = self.bindings['output'].data\n        elif self.xml:  # OpenVINO\n            im_batch = im_batch.cpu().numpy()  # FP32\n            features = self.executable_network([im_batch])[self.output_layer]\n        else:\n            print('Framework not supported at the moment, we are working on it...')\n            exit()\n\n        if isinstance(features, (list, tuple)):\n            return self.from_numpy(features[0]) if len(features) == 1 else [self.from_numpy(x) for x in features]\n        else:\n            return self.from_numpy(features)\n\n    def from_numpy(self, x):\n        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x\n\n    def warmup(self, imgsz=[(256, 128, 3)]):\n        # Warmup model by running inference once\n        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.tflite\n        if any(warmup_types) and self.device.type != 'cpu':\n            im = [np.empty(*imgsz).astype(np.uint8)]  # input\n            for _ in range(2 if self.jit else 1):  #\n                self.forward(im)  # warmup"
  },
  {
    "path": "DLTA_AI_app/trackers/bytetrack/basetrack.py",
    "content": "import numpy as np\nfrom collections import OrderedDict\n\n\nclass TrackState(object):\n    New = 0\n    Tracked = 1\n    Lost = 2\n    Removed = 3\n\n\nclass BaseTrack(object):\n    _count = 0\n\n    track_id = 0\n    is_activated = False\n    state = TrackState.New\n\n    history = OrderedDict()\n    features = []\n    curr_feature = None\n    score = 0\n    start_frame = 0\n    frame_id = 0\n    time_since_update = 0\n\n    # multi-camera\n    location = (np.inf, np.inf)\n\n    @property\n    def end_frame(self):\n        return self.frame_id\n\n    @staticmethod\n    def next_id():\n        BaseTrack._count += 1\n        return BaseTrack._count\n\n    def activate(self, *args):\n        raise NotImplementedError\n\n    def predict(self):\n        raise NotImplementedError\n\n    def update(self, *args, **kwargs):\n        raise NotImplementedError\n\n    def mark_lost(self):\n        self.state = TrackState.Lost\n\n    def mark_removed(self):\n        self.state = TrackState.Removed\n"
  },
  {
    "path": "DLTA_AI_app/trackers/bytetrack/byte_tracker.py",
    "content": "import numpy as np\nfrom collections import deque\nimport os\nimport os.path as osp\nimport copy\nimport torch\nimport torch.nn.functional as F\n\nfrom ultralytics.yolo.utils.ops import xywh2xyxy, xyxy2xywh\n\n\nfrom trackers.bytetrack.kalman_filter import KalmanFilter\nfrom trackers.bytetrack import matching\nfrom trackers.bytetrack.basetrack import BaseTrack, TrackState\n\nclass STrack(BaseTrack):\n    shared_kalman = KalmanFilter()\n    def __init__(self, tlwh, score, cls):\n\n        # wait activate\n        self._tlwh = np.asarray(tlwh, dtype=np.float32)\n        self.kalman_filter = None\n        self.mean, self.covariance = None, None\n        self.is_activated = False\n\n        self.score = score\n        self.tracklet_len = 0\n        self.cls = cls\n\n    def predict(self):\n        mean_state = self.mean.copy()\n        if self.state != TrackState.Tracked:\n            mean_state[7] = 0\n        self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)\n\n    @staticmethod\n    def multi_predict(stracks):\n        if len(stracks) > 0:\n            multi_mean = np.asarray([st.mean.copy() for st in stracks])\n            multi_covariance = np.asarray([st.covariance for st in stracks])\n            for i, st in enumerate(stracks):\n                if st.state != TrackState.Tracked:\n                    multi_mean[i][7] = 0\n            multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)\n            for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):\n                stracks[i].mean = mean\n                stracks[i].covariance = cov\n\n    def activate(self, kalman_filter, frame_id):\n        \"\"\"Start a new tracklet\"\"\"\n        self.kalman_filter = kalman_filter\n        self.track_id = self.next_id()\n        self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))\n\n        self.tracklet_len = 0\n        self.state = TrackState.Tracked\n        if frame_id == 1:\n            self.is_activated = True\n        # self.is_activated = True\n        self.frame_id = frame_id\n        self.start_frame = frame_id\n\n    def re_activate(self, new_track, frame_id, new_id=False):\n        self.mean, self.covariance = self.kalman_filter.update(\n            self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)\n        )\n        self.tracklet_len = 0\n        self.state = TrackState.Tracked\n        self.is_activated = True\n        self.frame_id = frame_id\n        if new_id:\n            self.track_id = self.next_id()\n        self.score = new_track.score\n        self.cls = new_track.cls\n\n    def update(self, new_track, frame_id):\n        \"\"\"\n        Update a matched track\n        :type new_track: STrack\n        :type frame_id: int\n        :type update_feature: bool\n        :return:\n        \"\"\"\n        self.frame_id = frame_id\n        self.tracklet_len += 1\n        # self.cls = cls\n\n        new_tlwh = new_track.tlwh\n        self.mean, self.covariance = self.kalman_filter.update(\n            self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))\n        self.state = TrackState.Tracked\n        self.is_activated = True\n\n        self.score = new_track.score\n\n    @property\n    # @jit(nopython=True)\n    def tlwh(self):\n        \"\"\"Get current position in bounding box format `(top left x, top left y,\n                width, height)`.\n        \"\"\"\n        if self.mean is None:\n            return self._tlwh.copy()\n        ret = self.mean[:4].copy()\n        ret[2] *= ret[3]\n        ret[:2] -= ret[2:] / 2\n        return ret\n\n    @property\n    # @jit(nopython=True)\n    def tlbr(self):\n        \"\"\"Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,\n        `(top left, bottom right)`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    @staticmethod\n    # @jit(nopython=True)\n    def tlwh_to_xyah(tlwh):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = np.asarray(tlwh).copy()\n        ret[:2] += ret[2:] / 2\n        ret[2] /= ret[3]\n        return ret\n\n    def to_xyah(self):\n        return self.tlwh_to_xyah(self.tlwh)\n\n    @staticmethod\n    # @jit(nopython=True)\n    def tlbr_to_tlwh(tlbr):\n        ret = np.asarray(tlbr).copy()\n        ret[2:] -= ret[:2]\n        return ret\n\n    @staticmethod\n    # @jit(nopython=True)\n    def tlwh_to_tlbr(tlwh):\n        ret = np.asarray(tlwh).copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    def __repr__(self):\n        return 'OT_{}_({}-{})'.format(self.track_id, self.start_frame, self.end_frame)\n\n\nclass BYTETracker(object):\n    def __init__(self, track_thresh=0.45, match_thresh=0.8, track_buffer=25, frame_rate=30):\n        self.tracked_stracks = []  # type: list[STrack]\n        self.lost_stracks = []  # type: list[STrack]\n        self.removed_stracks = []  # type: list[STrack]\n\n        self.frame_id = 0\n        self.track_buffer=track_buffer\n        \n        self.track_thresh = track_thresh\n        self.match_thresh = match_thresh\n        self.det_thresh = track_thresh + 0.1\n        self.buffer_size = int(frame_rate / 30.0 * track_buffer)\n        self.max_time_lost = self.buffer_size\n        self.kalman_filter = KalmanFilter()\n\n    def update(self, dets, _):\n        self.frame_id += 1\n        activated_starcks = []\n        refind_stracks = []\n        lost_stracks = []\n        removed_stracks = []\n\n        xyxys = dets[:, 0:4]\n        xywh = xyxy2xywh(xyxys.numpy())\n        confs = dets[:, 4]\n        clss = dets[:, 5]\n        \n        classes = clss.numpy()\n        xyxys = xyxys.numpy()\n        confs = confs.numpy()\n\n        remain_inds = confs > self.track_thresh\n        inds_low = confs > 0.1\n        inds_high = confs < self.track_thresh\n\n        inds_second = np.logical_and(inds_low, inds_high)\n        \n        dets_second = xywh[inds_second]\n        dets = xywh[remain_inds]\n        \n        scores_keep = confs[remain_inds]\n        scores_second = confs[inds_second]\n        \n        clss_keep = classes[remain_inds]\n        clss_second = classes[inds_second]\n        \n\n        if len(dets) > 0:\n            '''Detections'''\n            detections = [STrack(xyxy, s, c) for \n                (xyxy, s, c) in zip(dets, scores_keep, clss_keep)]\n        else:\n            detections = []\n\n        ''' Add newly detected tracklets to tracked_stracks'''\n        unconfirmed = []\n        tracked_stracks = []  # type: list[STrack]\n        for track in self.tracked_stracks:\n            if not track.is_activated:\n                unconfirmed.append(track)\n            else:\n                tracked_stracks.append(track)\n\n        ''' Step 2: First association, with high score detection boxes'''\n        strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)\n        # Predict the current location with KF\n        STrack.multi_predict(strack_pool)\n        dists = matching.iou_distance(strack_pool, detections)\n        #if not self.args.mot20:\n        dists = matching.fuse_score(dists, detections)\n        matches, u_track, u_detection = matching.linear_assignment(dists, thresh=self.match_thresh)\n\n        for itracked, idet in matches:\n            track = strack_pool[itracked]\n            det = detections[idet]\n            if track.state == TrackState.Tracked:\n                track.update(detections[idet], self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        ''' Step 3: Second association, with low score detection boxes'''\n        # association the untrack to the low score detections\n        if len(dets_second) > 0:\n            '''Detections'''\n            detections_second = [STrack(xywh, s, c) for (xywh, s, c) in zip(dets_second, scores_second, clss_second)]\n        else:\n            detections_second = []\n        r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]\n        dists = matching.iou_distance(r_tracked_stracks, detections_second)\n        matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)\n        for itracked, idet in matches:\n            track = r_tracked_stracks[itracked]\n            det = detections_second[idet]\n            if track.state == TrackState.Tracked:\n                track.update(det, self.frame_id)\n                activated_starcks.append(track)\n            else:\n                track.re_activate(det, self.frame_id, new_id=False)\n                refind_stracks.append(track)\n\n        for it in u_track:\n            track = r_tracked_stracks[it]\n            if not track.state == TrackState.Lost:\n                track.mark_lost()\n                lost_stracks.append(track)\n\n        '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''\n        detections = [detections[i] for i in u_detection]\n        dists = matching.iou_distance(unconfirmed, detections)\n        #if not self.args.mot20:\n        dists = matching.fuse_score(dists, detections)\n        matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)\n        for itracked, idet in matches:\n            unconfirmed[itracked].update(detections[idet], self.frame_id)\n            activated_starcks.append(unconfirmed[itracked])\n        for it in u_unconfirmed:\n            track = unconfirmed[it]\n            track.mark_removed()\n            removed_stracks.append(track)\n\n        \"\"\" Step 4: Init new stracks\"\"\"\n        for inew in u_detection:\n            track = detections[inew]\n            if track.score < self.det_thresh:\n                continue\n            track.activate(self.kalman_filter, self.frame_id)\n            activated_starcks.append(track)\n        \"\"\" Step 5: Update state\"\"\"\n        for track in self.lost_stracks:\n            if self.frame_id - track.end_frame > self.max_time_lost:\n                track.mark_removed()\n                removed_stracks.append(track)\n\n        # print('Ramained match {} s'.format(t4-t3))\n\n        self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]\n        self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)\n        self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)\n        self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)\n        self.lost_stracks.extend(lost_stracks)\n        self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)\n        self.removed_stracks.extend(removed_stracks)\n        self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)\n        # get scores of lost tracks\n        output_stracks = [track for track in self.tracked_stracks if track.is_activated]\n        outputs = []\n        for t in output_stracks:\n            output= []\n            tlwh = t.tlwh\n            tid = t.track_id\n            tlwh = np.expand_dims(tlwh, axis=0)\n            xyxy = xywh2xyxy(tlwh)\n            xyxy = np.squeeze(xyxy, axis=0)\n            output.extend(xyxy)\n            output.append(tid)\n            output.append(t.cls)\n            output.append(t.score)\n            outputs.append(output)\n\n        return outputs\n#track_id, class_id, conf\n\ndef joint_stracks(tlista, tlistb):\n    exists = {}\n    res = []\n    for t in tlista:\n        exists[t.track_id] = 1\n        res.append(t)\n    for t in tlistb:\n        tid = t.track_id\n        if not exists.get(tid, 0):\n            exists[tid] = 1\n            res.append(t)\n    return res\n\n\ndef sub_stracks(tlista, tlistb):\n    stracks = {}\n    for t in tlista:\n        stracks[t.track_id] = t\n    for t in tlistb:\n        tid = t.track_id\n        if stracks.get(tid, 0):\n            del stracks[tid]\n    return list(stracks.values())\n\n\ndef remove_duplicate_stracks(stracksa, stracksb):\n    pdist = matching.iou_distance(stracksa, stracksb)\n    pairs = np.where(pdist < 0.15)\n    dupa, dupb = list(), list()\n    for p, q in zip(*pairs):\n        timep = stracksa[p].frame_id - stracksa[p].start_frame\n        timeq = stracksb[q].frame_id - stracksb[q].start_frame\n        if timep > timeq:\n            dupb.append(q)\n        else:\n            dupa.append(p)\n    resa = [t for i, t in enumerate(stracksa) if not i in dupa]\n    resb = [t for i, t in enumerate(stracksb) if not i in dupb]\n    return resa, resb\n"
  },
  {
    "path": "DLTA_AI_app/trackers/bytetrack/configs/bytetrack.yaml",
    "content": "bytetrack:\n  track_thresh: 0.6  # tracking confidence threshold\n  track_buffer: 30   # the frames for keep lost tracks\n  match_thresh: 0.8  # matching threshold for tracking\n  frame_rate: 30     # FPS\n  conf_thres: 0.5122620708221085\n  \n"
  },
  {
    "path": "DLTA_AI_app/trackers/bytetrack/kalman_filter.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\n\n\n\"\"\"\nTable for the 0.95 quantile of the chi-square distribution with N degrees of\nfreedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv\nfunction and used as Mahalanobis gating threshold.\n\"\"\"\nchi2inv95 = {\n    1: 3.8415,\n    2: 5.9915,\n    3: 7.8147,\n    4: 9.4877,\n    5: 11.070,\n    6: 12.592,\n    7: 14.067,\n    8: 15.507,\n    9: 16.919}\n\n\nclass KalmanFilter(object):\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n\n    The 8-dimensional state space\n\n        x, y, a, h, vx, vy, va, vh\n\n    contains the bounding box center position (x, y), aspect ratio a, height h,\n    and their respective velocities.\n\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, a, h) is taken as direct observation of the state space (linear\n    observation model).\n\n    \"\"\"\n\n    def __init__(self):\n        ndim, dt = 4, 1.\n\n        # Create Kalman filter model matrices.\n        self._motion_mat = np.eye(2 * ndim, 2 * ndim)\n        for i in range(ndim):\n            self._motion_mat[i, ndim + i] = dt\n        self._update_mat = np.eye(ndim, 2 * ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_position = 1. / 20\n        self._std_weight_velocity = 1. / 160\n\n    def initiate(self, measurement):\n        \"\"\"Create track from unassociated measurement.\n\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, a, h) with center position (x, y),\n            aspect ratio a, and height h.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (8 dimensional) and covariance matrix (8x8\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n\n        \"\"\"\n        mean_pos = measurement\n        mean_vel = np.zeros_like(mean_pos)\n        mean = np.r_[mean_pos, mean_vel]\n\n        std = [\n            2 * self._std_weight_position * measurement[3],\n            2 * self._std_weight_position * measurement[3],\n            1e-2,\n            2 * self._std_weight_position * measurement[3],\n            10 * self._std_weight_velocity * measurement[3],\n            10 * self._std_weight_velocity * measurement[3],\n            1e-5,\n            10 * self._std_weight_velocity * measurement[3]]\n        covariance = np.diag(np.square(std))\n        return mean, covariance\n\n    def predict(self, mean, covariance):\n        \"\"\"Run Kalman filter prediction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The 8 dimensional mean vector of the object state at the previous\n            time step.\n        covariance : ndarray\n            The 8x8 dimensional covariance matrix of the object state at the\n            previous time step.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n\n        \"\"\"\n        std_pos = [\n            self._std_weight_position * mean[3],\n            self._std_weight_position * mean[3],\n            1e-2,\n            self._std_weight_position * mean[3]]\n        std_vel = [\n            self._std_weight_velocity * mean[3],\n            self._std_weight_velocity * mean[3],\n            1e-5,\n            self._std_weight_velocity * mean[3]]\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))\n\n        #mean = np.dot(self._motion_mat, mean)\n        mean = np.dot(mean, self._motion_mat.T)\n        covariance = np.linalg.multi_dot((\n            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov\n\n        return mean, covariance\n\n    def project(self, mean, covariance):\n        \"\"\"Project state distribution to measurement space.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The state's mean vector (8 dimensional array).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the projected mean and covariance matrix of the given state\n            estimate.\n\n        \"\"\"\n        std = [\n            self._std_weight_position * mean[3],\n            self._std_weight_position * mean[3],\n            1e-1,\n            self._std_weight_position * mean[3]]\n        innovation_cov = np.diag(np.square(std))\n\n        mean = np.dot(self._update_mat, mean)\n        covariance = np.linalg.multi_dot((\n            self._update_mat, covariance, self._update_mat.T))\n        return mean, covariance + innovation_cov\n\n    def multi_predict(self, mean, covariance):\n        \"\"\"Run Kalman filter prediction step (Vectorized version).\n        Parameters\n        ----------\n        mean : ndarray\n            The Nx8 dimensional mean matrix of the object states at the previous\n            time step.\n        covariance : ndarray\n            The Nx8x8 dimensional covariance matrics of the object states at the\n            previous time step.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n        \"\"\"\n        std_pos = [\n            self._std_weight_position * mean[:, 3],\n            self._std_weight_position * mean[:, 3],\n            1e-2 * np.ones_like(mean[:, 3]),\n            self._std_weight_position * mean[:, 3]]\n        std_vel = [\n            self._std_weight_velocity * mean[:, 3],\n            self._std_weight_velocity * mean[:, 3],\n            1e-5 * np.ones_like(mean[:, 3]),\n            self._std_weight_velocity * mean[:, 3]]\n        sqr = np.square(np.r_[std_pos, std_vel]).T\n\n        motion_cov = []\n        for i in range(len(mean)):\n            motion_cov.append(np.diag(sqr[i]))\n        motion_cov = np.asarray(motion_cov)\n\n        mean = np.dot(mean, self._motion_mat.T)\n        left = np.dot(self._motion_mat, covariance).transpose((1, 0, 2))\n        covariance = np.dot(left, self._motion_mat.T) + motion_cov\n\n        return mean, covariance\n\n    def update(self, mean, covariance, measurement):\n        \"\"\"Run Kalman filter correction step.\n\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (8 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        measurement : ndarray\n            The 4 dimensional measurement vector (x, y, a, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n\n        \"\"\"\n        projected_mean, projected_cov = self.project(mean, covariance)\n\n        chol_factor, lower = scipy.linalg.cho_factor(\n            projected_cov, lower=True, check_finite=False)\n        kalman_gain = scipy.linalg.cho_solve(\n            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,\n            check_finite=False).T\n        innovation = measurement - projected_mean\n\n        new_mean = mean + np.dot(innovation, kalman_gain.T)\n        new_covariance = covariance - np.linalg.multi_dot((\n            kalman_gain, projected_cov, kalman_gain.T))\n        return new_mean, new_covariance\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False, metric='maha'):\n        \"\"\"Compute gating distance between state distribution and measurements.\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n        \"\"\"\n        mean, covariance = self.project(mean, covariance)\n        if only_position:\n            mean, covariance = mean[:2], covariance[:2, :2]\n            measurements = measurements[:, :2]\n\n        d = measurements - mean\n        if metric == 'gaussian':\n            return np.sum(d * d, axis=1)\n        elif metric == 'maha':\n            cholesky_factor = np.linalg.cholesky(covariance)\n            z = scipy.linalg.solve_triangular(\n                cholesky_factor, d.T, lower=True, check_finite=False,\n                overwrite_b=True)\n            squared_maha = np.sum(z * z, axis=0)\n            return squared_maha\n        else:\n            raise ValueError('invalid distance metric')"
  },
  {
    "path": "DLTA_AI_app/trackers/bytetrack/matching.py",
    "content": "import cv2\nimport numpy as np\nimport scipy\nimport lap\nfrom scipy.spatial.distance import cdist\n\nfrom trackers.bytetrack import kalman_filter\nimport time\n\ndef merge_matches(m1, m2, shape):\n    O,P,Q = shape\n    m1 = np.asarray(m1)\n    m2 = np.asarray(m2)\n\n    M1 = scipy.sparse.coo_matrix((np.ones(len(m1)), (m1[:, 0], m1[:, 1])), shape=(O, P))\n    M2 = scipy.sparse.coo_matrix((np.ones(len(m2)), (m2[:, 0], m2[:, 1])), shape=(P, Q))\n\n    mask = M1*M2\n    match = mask.nonzero()\n    match = list(zip(match[0], match[1]))\n    unmatched_O = tuple(set(range(O)) - set([i for i, j in match]))\n    unmatched_Q = tuple(set(range(Q)) - set([j for i, j in match]))\n\n    return match, unmatched_O, unmatched_Q\n\n\ndef _indices_to_matches(cost_matrix, indices, thresh):\n    matched_cost = cost_matrix[tuple(zip(*indices))]\n    matched_mask = (matched_cost <= thresh)\n\n    matches = indices[matched_mask]\n    unmatched_a = tuple(set(range(cost_matrix.shape[0])) - set(matches[:, 0]))\n    unmatched_b = tuple(set(range(cost_matrix.shape[1])) - set(matches[:, 1]))\n\n    return matches, unmatched_a, unmatched_b\n\n\ndef linear_assignment(cost_matrix, thresh):\n    if cost_matrix.size == 0:\n        return np.empty((0, 2), dtype=int), tuple(range(cost_matrix.shape[0])), tuple(range(cost_matrix.shape[1]))\n    matches, unmatched_a, unmatched_b = [], [], []\n    cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)\n    for ix, mx in enumerate(x):\n        if mx >= 0:\n            matches.append([ix, mx])\n    unmatched_a = np.where(x < 0)[0]\n    unmatched_b = np.where(y < 0)[0]\n    matches = np.asarray(matches)\n    return matches, unmatched_a, unmatched_b\n\n\ndef ious(atlbrs, btlbrs):\n    \"\"\"\n    Compute cost based on IoU\n    :type atlbrs: list[tlbr] | np.ndarray\n    :type atlbrs: list[tlbr] | np.ndarray\n\n    :rtype ious np.ndarray\n    \"\"\"\n    ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float32)\n    if ious.size == 0:\n        return ious\n\n    ious = bbox_ious(\n        np.ascontiguousarray(atlbrs, dtype=np.float32),\n        np.ascontiguousarray(btlbrs, dtype=np.float32)\n    )\n\n    return ious\n\n\ndef iou_distance(atracks, btracks):\n    \"\"\"\n    Compute cost based on IoU\n    :type atracks: list[STrack]\n    :type btracks: list[STrack]\n\n    :rtype cost_matrix np.ndarray\n    \"\"\"\n\n    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):\n        atlbrs = atracks\n        btlbrs = btracks\n    else:\n        atlbrs = [track.tlbr for track in atracks]\n        btlbrs = [track.tlbr for track in btracks]\n    _ious = ious(atlbrs, btlbrs)\n    cost_matrix = 1 - _ious\n\n    return cost_matrix\n\ndef v_iou_distance(atracks, btracks):\n    \"\"\"\n    Compute cost based on IoU\n    :type atracks: list[STrack]\n    :type btracks: list[STrack]\n\n    :rtype cost_matrix np.ndarray\n    \"\"\"\n\n    if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):\n        atlbrs = atracks\n        btlbrs = btracks\n    else:\n        atlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in atracks]\n        btlbrs = [track.tlwh_to_tlbr(track.pred_bbox) for track in btracks]\n    _ious = ious(atlbrs, btlbrs)\n    cost_matrix = 1 - _ious\n\n    return cost_matrix\n\ndef embedding_distance(tracks, detections, metric='cosine'):\n    \"\"\"\n    :param tracks: list[STrack]\n    :param detections: list[BaseTrack]\n    :param metric:\n    :return: cost_matrix np.ndarray\n    \"\"\"\n\n    cost_matrix = np.zeros((len(tracks), len(detections)), dtype=np.float32)\n    if cost_matrix.size == 0:\n        return cost_matrix\n    det_features = np.asarray([track.curr_feat for track in detections], dtype=np.float32)\n    #for i, track in enumerate(tracks):\n        #cost_matrix[i, :] = np.maximum(0.0, cdist(track.smooth_feat.reshape(1,-1), det_features, metric))\n    track_features = np.asarray([track.smooth_feat for track in tracks], dtype=np.float32)\n    cost_matrix = np.maximum(0.0, cdist(track_features, det_features, metric))  # Nomalized features\n    return cost_matrix\n\n\ndef gate_cost_matrix(kf, cost_matrix, tracks, detections, only_position=False):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    gating_dim = 2 if only_position else 4\n    gating_threshold = kalman_filter.chi2inv95[gating_dim]\n    measurements = np.asarray([det.to_xyah() for det in detections])\n    for row, track in enumerate(tracks):\n        gating_distance = kf.gating_distance(\n            track.mean, track.covariance, measurements, only_position)\n        cost_matrix[row, gating_distance > gating_threshold] = np.inf\n    return cost_matrix\n\n\ndef fuse_motion(kf, cost_matrix, tracks, detections, only_position=False, lambda_=0.98):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    gating_dim = 2 if only_position else 4\n    gating_threshold = kalman_filter.chi2inv95[gating_dim]\n    measurements = np.asarray([det.to_xyah() for det in detections])\n    for row, track in enumerate(tracks):\n        gating_distance = kf.gating_distance(\n            track.mean, track.covariance, measurements, only_position, metric='maha')\n        cost_matrix[row, gating_distance > gating_threshold] = np.inf\n        cost_matrix[row] = lambda_ * cost_matrix[row] + (1 - lambda_) * gating_distance\n    return cost_matrix\n\n\ndef fuse_iou(cost_matrix, tracks, detections):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    reid_sim = 1 - cost_matrix\n    iou_dist = iou_distance(tracks, detections)\n    iou_sim = 1 - iou_dist\n    fuse_sim = reid_sim * (1 + iou_sim) / 2\n    det_scores = np.array([det.score for det in detections])\n    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)\n    #fuse_sim = fuse_sim * (1 + det_scores) / 2\n    fuse_cost = 1 - fuse_sim\n    return fuse_cost\n\n\ndef fuse_score(cost_matrix, detections):\n    if cost_matrix.size == 0:\n        return cost_matrix\n    iou_sim = 1 - cost_matrix\n    det_scores = np.array([det.score for det in detections])\n    det_scores = np.expand_dims(det_scores, axis=0).repeat(cost_matrix.shape[0], axis=0)\n    fuse_sim = iou_sim * det_scores\n    fuse_cost = 1 - fuse_sim\n    return fuse_cost\n\n\ndef bbox_ious(boxes, query_boxes):\n    \"\"\"\n    Parameters\n    ----------\n    boxes: (N, 4) ndarray of float\n    query_boxes: (K, 4) ndarray of float\n    Returns\n    -------\n    overlaps: (N, K) ndarray of overlap between boxes and query_boxes\n    \"\"\"\n    N = boxes.shape[0]\n    K = query_boxes.shape[0]\n    overlaps = np.zeros((N, K), dtype=np.float32)\n    \n    for k in range(K):\n        box_area = (\n            (query_boxes[k, 2] - query_boxes[k, 0] + 1) *\n            (query_boxes[k, 3] - query_boxes[k, 1] + 1)\n        )\n        for n in range(N):\n            iw = (\n                min(boxes[n, 2], query_boxes[k, 2]) -\n                max(boxes[n, 0], query_boxes[k, 0]) + 1\n            )\n            if iw > 0:\n                ih = (\n                    min(boxes[n, 3], query_boxes[k, 3]) -\n                    max(boxes[n, 1], query_boxes[k, 1]) + 1\n                )\n                if ih > 0:\n                    ua = float(\n                        (boxes[n, 2] - boxes[n, 0] + 1) *\n                        (boxes[n, 3] - boxes[n, 1] + 1) +\n                        box_area - iw * ih\n                    )\n                    overlaps[n, k] = iw * ih / ua\n    return overlaps"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/__init__.py",
    "content": "from . import args\nfrom . import ocsort\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/args.py",
    "content": "import argparse\n\n\ndef make_parser():\n    parser = argparse.ArgumentParser(\"OC-SORT parameters\")\n\n    # distributed\n    parser.add_argument(\"-b\", \"--batch-size\", type=int, default=1, help=\"batch size\")\n    parser.add_argument(\"-d\", \"--devices\", default=None, type=int, help=\"device for training\")\n\n    parser.add_argument(\"--local_rank\", default=0, type=int, help=\"local rank for dist training\")\n    parser.add_argument(\"--num_machines\", default=1, type=int, help=\"num of node for training\")\n    parser.add_argument(\"--machine_rank\", default=0, type=int, help=\"node rank for multi-node training\")\n\n    parser.add_argument(\n        \"-f\",\n        \"--exp_file\",\n        default=None,\n        type=str,\n        help=\"pls input your expriment description file\",\n    )\n    parser.add_argument(\n        \"--test\",\n        dest=\"test\",\n        default=False,\n        action=\"store_true\",\n        help=\"Evaluating on test-dev set.\",\n    )\n    parser.add_argument(\n        \"opts\",\n        help=\"Modify config options using the command-line\",\n        default=None,\n        nargs=argparse.REMAINDER,\n    )\n\n    # det args\n    parser.add_argument(\"-c\", \"--ckpt\", default=None, type=str, help=\"ckpt for eval\")\n    parser.add_argument(\"--conf\", default=0.1, type=float, help=\"test conf\")\n    parser.add_argument(\"--nms\", default=0.7, type=float, help=\"test nms threshold\")\n    parser.add_argument(\"--tsize\", default=[800, 1440], nargs=\"+\", type=int, help=\"test img size\")\n    parser.add_argument(\"--seed\", default=None, type=int, help=\"eval seed\")\n\n    # tracking args\n    parser.add_argument(\"--track_thresh\", type=float, default=0.6, help=\"detection confidence threshold\")\n    parser.add_argument(\n        \"--iou_thresh\",\n        type=float,\n        default=0.3,\n        help=\"the iou threshold in Sort for matching\",\n    )\n    parser.add_argument(\"--min_hits\", type=int, default=3, help=\"min hits to create track in SORT\")\n    parser.add_argument(\n        \"--inertia\",\n        type=float,\n        default=0.2,\n        help=\"the weight of VDC term in cost matrix\",\n    )\n    parser.add_argument(\n        \"--deltat\",\n        type=int,\n        default=3,\n        help=\"time step difference to estimate direction\",\n    )\n    parser.add_argument(\"--track_buffer\", type=int, default=30, help=\"the frames for keep lost tracks\")\n    parser.add_argument(\n        \"--match_thresh\",\n        type=float,\n        default=0.9,\n        help=\"matching threshold for tracking\",\n    )\n    parser.add_argument(\n        \"--gt-type\",\n        type=str,\n        default=\"_val_half\",\n        help=\"suffix to find the gt annotation\",\n    )\n    parser.add_argument(\"--public\", action=\"store_true\", help=\"use public detection\")\n    parser.add_argument(\"--asso\", default=\"iou\", help=\"similarity function: iou/giou/diou/ciou/ctdis\")\n\n    # for kitti/bdd100k inference with public detections\n    parser.add_argument(\n        \"--raw_results_path\",\n        type=str,\n        default=\"exps/permatrack_kitti_test/\",\n        help=\"path to the raw tracking results from other tracks\",\n    )\n    parser.add_argument(\"--out_path\", type=str, help=\"path to save output results\")\n    parser.add_argument(\n        \"--hp\",\n        action=\"store_true\",\n        help=\"use head padding to add the missing objects during \\\n            initializing the tracks (offline).\",\n    )\n\n    # for demo video\n    parser.add_argument(\"--demo_type\", default=\"image\", help=\"demo type, eg. image, video and webcam\")\n    parser.add_argument(\"--path\", default=\"./videos/demo.mp4\", help=\"path to images or video\")\n    parser.add_argument(\"--camid\", type=int, default=0, help=\"webcam demo camera id\")\n    parser.add_argument(\n        \"--save_result\",\n        action=\"store_true\",\n        help=\"whether to save the inference result of image/video\",\n    )\n    parser.add_argument(\n        \"--device\",\n        default=\"gpu\",\n        type=str,\n        help=\"device to run our model, can either be cpu or gpu\",\n    )\n    return parser\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/association.py",
    "content": "import os\nimport pdb\n\nimport numpy as np\nfrom scipy.special import softmax\n\n\ndef iou_batch(bboxes1, bboxes2):\n    \"\"\"\n    From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]\n    \"\"\"\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0.0, xx2 - xx1)\n    h = np.maximum(0.0, yy2 - yy1)\n    wh = w * h\n    o = wh / (\n        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])\n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])\n        - wh\n    )\n    return o\n\n\ndef giou_batch(bboxes1, bboxes2):\n    \"\"\"\n    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)\n    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)\n    :return:\n    \"\"\"\n    # for details should go to https://arxiv.org/pdf/1902.09630.pdf\n    # ensure predict's bbox form\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0.0, xx2 - xx1)\n    h = np.maximum(0.0, yy2 - yy1)\n    wh = w * h\n    iou = wh / (\n        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])\n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])\n        - wh\n    )\n\n    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])\n    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])\n    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])\n    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])\n    wc = xxc2 - xxc1\n    hc = yyc2 - yyc1\n    assert (wc > 0).all() and (hc > 0).all()\n    area_enclose = wc * hc\n    giou = iou - (area_enclose - wh) / area_enclose\n    giou = (giou + 1.0) / 2.0  # resize from (-1,1) to (0,1)\n    return giou\n\n\ndef diou_batch(bboxes1, bboxes2):\n    \"\"\"\n    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)\n    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)\n    :return:\n    \"\"\"\n    # for details should go to https://arxiv.org/pdf/1902.09630.pdf\n    # ensure predict's bbox form\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    # calculate the intersection box\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0.0, xx2 - xx1)\n    h = np.maximum(0.0, yy2 - yy1)\n    wh = w * h\n    iou = wh / (\n        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])\n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])\n        - wh\n    )\n\n    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0\n    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0\n    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0\n    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0\n\n    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2\n\n    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])\n    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])\n    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])\n    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])\n\n    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2\n    diou = iou - inner_diag / outer_diag\n\n    return (diou + 1) / 2.0  # resize from (-1,1) to (0,1)\n\n\ndef ciou_batch(bboxes1, bboxes2):\n    \"\"\"\n    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)\n    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)\n    :return:\n    \"\"\"\n    # for details should go to https://arxiv.org/pdf/1902.09630.pdf\n    # ensure predict's bbox form\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    # calculate the intersection box\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0.0, xx2 - xx1)\n    h = np.maximum(0.0, yy2 - yy1)\n    wh = w * h\n    iou = wh / (\n        (bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])\n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1])\n        - wh\n    )\n\n    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0\n    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0\n    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0\n    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0\n\n    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2\n\n    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])\n    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])\n    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])\n    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])\n\n    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2\n\n    w1 = bboxes1[..., 2] - bboxes1[..., 0]\n    h1 = bboxes1[..., 3] - bboxes1[..., 1]\n    w2 = bboxes2[..., 2] - bboxes2[..., 0]\n    h2 = bboxes2[..., 3] - bboxes2[..., 1]\n\n    # prevent dividing over zero. add one pixel shift\n    h2 = h2 + 1.0\n    h1 = h1 + 1.0\n    arctan = np.arctan(w2 / h2) - np.arctan(w1 / h1)\n    v = (4 / (np.pi**2)) * (arctan**2)\n    S = 1 - iou\n    alpha = v / (S + v)\n    ciou = iou - inner_diag / outer_diag - alpha * v\n\n    return (ciou + 1) / 2.0  # resize from (-1,1) to (0,1)\n\n\ndef ct_dist(bboxes1, bboxes2):\n    \"\"\"\n    Measure the center distance between two sets of bounding boxes,\n    this is a coarse implementation, we don't recommend using it only\n    for association, which can be unstable and sensitive to frame rate\n    and object speed.\n    \"\"\"\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0\n    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0\n    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0\n    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0\n\n    ct_dist2 = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2\n\n    ct_dist = np.sqrt(ct_dist2)\n\n    # The linear rescaling is a naive version and needs more study\n    ct_dist = ct_dist / ct_dist.max()\n    return ct_dist.max() - ct_dist  # resize to (0,1)\n\n\ndef speed_direction_batch(dets, tracks):\n    tracks = tracks[..., np.newaxis]\n    CX1, CY1 = (dets[:, 0] + dets[:, 2]) / 2.0, (dets[:, 1] + dets[:, 3]) / 2.0\n    CX2, CY2 = (tracks[:, 0] + tracks[:, 2]) / 2.0, (tracks[:, 1] + tracks[:, 3]) / 2.0\n    dx = CX1 - CX2\n    dy = CY1 - CY2\n    norm = np.sqrt(dx**2 + dy**2) + 1e-6\n    dx = dx / norm\n    dy = dy / norm\n    return dy, dx  # size: num_track x num_det\n\n\ndef linear_assignment(cost_matrix):\n    try:\n        import lap\n\n        _, x, y = lap.lapjv(cost_matrix, extend_cost=True)\n        return np.array([[y[i], i] for i in x if i >= 0])  #\n    except ImportError:\n        from scipy.optimize import linear_sum_assignment\n\n        x, y = linear_sum_assignment(cost_matrix)\n        return np.array(list(zip(x, y)))\n\n\ndef associate_detections_to_trackers(detections, trackers, iou_threshold=0.3):\n    \"\"\"\n    Assigns detections to tracked object (both represented as bounding boxes)\n    Returns 3 lists of matches, unmatched_detections and unmatched_trackers\n    \"\"\"\n    if len(trackers) == 0:\n        return (\n            np.empty((0, 2), dtype=int),\n            np.arange(len(detections)),\n            np.empty((0, 5), dtype=int),\n        )\n\n    iou_matrix = iou_batch(detections, trackers)\n\n    if min(iou_matrix.shape) > 0:\n        a = (iou_matrix > iou_threshold).astype(np.int32)\n        if a.sum(1).max() == 1 and a.sum(0).max() == 1:\n            matched_indices = np.stack(np.where(a), axis=1)\n        else:\n            matched_indices = linear_assignment(-iou_matrix)\n    else:\n        matched_indices = np.empty(shape=(0, 2))\n\n    unmatched_detections = []\n    for d, det in enumerate(detections):\n        if d not in matched_indices[:, 0]:\n            unmatched_detections.append(d)\n    unmatched_trackers = []\n    for t, trk in enumerate(trackers):\n        if t not in matched_indices[:, 1]:\n            unmatched_trackers.append(t)\n\n    # filter out matched with low IOU\n    matches = []\n    for m in matched_indices:\n        if iou_matrix[m[0], m[1]] < iou_threshold:\n            unmatched_detections.append(m[0])\n            unmatched_trackers.append(m[1])\n        else:\n            matches.append(m.reshape(1, 2))\n    if len(matches) == 0:\n        matches = np.empty((0, 2), dtype=int)\n    else:\n        matches = np.concatenate(matches, axis=0)\n\n    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)\n\n\ndef compute_aw_max_metric(emb_cost, w_association_emb, bottom=0.5):\n    w_emb = np.full_like(emb_cost, w_association_emb)\n\n    for idx in range(emb_cost.shape[0]):\n        inds = np.argsort(-emb_cost[idx])\n        # If there's less than two matches, just keep original weight\n        if len(inds) < 2:\n            continue\n        if emb_cost[idx, inds[0]] == 0:\n            row_weight = 0\n        else:\n            row_weight = 1 - max((emb_cost[idx, inds[1]] / emb_cost[idx, inds[0]]) - bottom, 0) / (1 - bottom)\n        w_emb[idx] *= row_weight\n\n    for idj in range(emb_cost.shape[1]):\n        inds = np.argsort(-emb_cost[:, idj])\n        # If there's less than two matches, just keep original weight\n        if len(inds) < 2:\n            continue\n        if emb_cost[inds[0], idj] == 0:\n            col_weight = 0\n        else:\n            col_weight = 1 - max((emb_cost[inds[1], idj] / emb_cost[inds[0], idj]) - bottom, 0) / (1 - bottom)\n        w_emb[:, idj] *= col_weight\n\n    return w_emb * emb_cost\n\n\ndef associate(\n    detections, trackers, iou_threshold, velocities, previous_obs, vdc_weight, emb_cost, w_assoc_emb, aw_off, aw_param\n):\n    if len(trackers) == 0:\n        return (\n            np.empty((0, 2), dtype=int),\n            np.arange(len(detections)),\n            np.empty((0, 5), dtype=int),\n        )\n\n    Y, X = speed_direction_batch(detections, previous_obs)\n    inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]\n    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)\n    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)\n    diff_angle_cos = inertia_X * X + inertia_Y * Y\n    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)\n    diff_angle = np.arccos(diff_angle_cos)\n    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi\n\n    valid_mask = np.ones(previous_obs.shape[0])\n    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0\n\n    iou_matrix = iou_batch(detections, trackers)\n    scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)\n    # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this\n    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)\n\n    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight\n    angle_diff_cost = angle_diff_cost.T\n    angle_diff_cost = angle_diff_cost * scores\n\n    if min(iou_matrix.shape) > 0:\n        a = (iou_matrix > iou_threshold).astype(np.int32)\n        if a.sum(1).max() == 1 and a.sum(0).max() == 1:\n            matched_indices = np.stack(np.where(a), axis=1)\n        else:\n            if emb_cost is None:\n                emb_cost = 0\n            else:\n                emb_cost = emb_cost.cpu().numpy()\n                emb_cost[iou_matrix <= 0] = 0\n                if not aw_off:\n                    emb_cost = compute_aw_max_metric(emb_cost, w_assoc_emb, bottom=aw_param)\n                else:\n                    emb_cost *= w_assoc_emb\n\n            final_cost = -(iou_matrix + angle_diff_cost + emb_cost)\n            matched_indices = linear_assignment(final_cost)\n    else:\n        matched_indices = np.empty(shape=(0, 2))\n\n    unmatched_detections = []\n    for d, det in enumerate(detections):\n        if d not in matched_indices[:, 0]:\n            unmatched_detections.append(d)\n    unmatched_trackers = []\n    for t, trk in enumerate(trackers):\n        if t not in matched_indices[:, 1]:\n            unmatched_trackers.append(t)\n\n    # filter out matched with low IOU\n    matches = []\n    for m in matched_indices:\n        if iou_matrix[m[0], m[1]] < iou_threshold:\n            unmatched_detections.append(m[0])\n            unmatched_trackers.append(m[1])\n        else:\n            matches.append(m.reshape(1, 2))\n    if len(matches) == 0:\n        matches = np.empty((0, 2), dtype=int)\n    else:\n        matches = np.concatenate(matches, axis=0)\n\n    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)\n\n\ndef associate_kitti(detections, trackers, det_cates, iou_threshold, velocities, previous_obs, vdc_weight):\n    if len(trackers) == 0:\n        return (\n            np.empty((0, 2), dtype=int),\n            np.arange(len(detections)),\n            np.empty((0, 5), dtype=int),\n        )\n\n    \"\"\"\n        Cost from the velocity direction consistency\n    \"\"\"\n    Y, X = speed_direction_batch(detections, previous_obs)\n    inertia_Y, inertia_X = velocities[:, 0], velocities[:, 1]\n    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)\n    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)\n    diff_angle_cos = inertia_X * X + inertia_Y * Y\n    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)\n    diff_angle = np.arccos(diff_angle_cos)\n    diff_angle = (np.pi / 2.0 - np.abs(diff_angle)) / np.pi\n\n    valid_mask = np.ones(previous_obs.shape[0])\n    valid_mask[np.where(previous_obs[:, 4] < 0)] = 0\n    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)\n\n    scores = np.repeat(detections[:, -1][:, np.newaxis], trackers.shape[0], axis=1)\n    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight\n    angle_diff_cost = angle_diff_cost.T\n    angle_diff_cost = angle_diff_cost * scores\n\n    \"\"\"\n        Cost from IoU\n    \"\"\"\n    iou_matrix = iou_batch(detections, trackers)\n\n    \"\"\"\n        With multiple categories, generate the cost for catgory mismatch\n    \"\"\"\n    num_dets = detections.shape[0]\n    num_trk = trackers.shape[0]\n    cate_matrix = np.zeros((num_dets, num_trk))\n    for i in range(num_dets):\n        for j in range(num_trk):\n            if det_cates[i] != trackers[j, 4]:\n                cate_matrix[i][j] = -1e6\n\n    cost_matrix = -iou_matrix - angle_diff_cost - cate_matrix\n\n    if min(iou_matrix.shape) > 0:\n        a = (iou_matrix > iou_threshold).astype(np.int32)\n        if a.sum(1).max() == 1 and a.sum(0).max() == 1:\n            matched_indices = np.stack(np.where(a), axis=1)\n        else:\n            matched_indices = linear_assignment(cost_matrix)\n    else:\n        matched_indices = np.empty(shape=(0, 2))\n\n    unmatched_detections = []\n    for d, det in enumerate(detections):\n        if d not in matched_indices[:, 0]:\n            unmatched_detections.append(d)\n    unmatched_trackers = []\n    for t, trk in enumerate(trackers):\n        if t not in matched_indices[:, 1]:\n            unmatched_trackers.append(t)\n\n    # filter out matched with low IOU\n    matches = []\n    for m in matched_indices:\n        if iou_matrix[m[0], m[1]] < iou_threshold:\n            unmatched_detections.append(m[0])\n            unmatched_trackers.append(m[1])\n        else:\n            matches.append(m.reshape(1, 2))\n    if len(matches) == 0:\n        matches = np.empty((0, 2), dtype=int)\n    else:\n        matches = np.concatenate(matches, axis=0)\n\n    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/cmc.py",
    "content": "import pdb\nimport pickle\nimport os\n\nimport cv2\nimport numpy as np\n\n\nclass CMCComputer:\n    def __init__(self, minimum_features=10, method=\"sparse\"):\n        assert method in [\"file\", \"sparse\", \"sift\"]\n\n        os.makedirs(\"./cache\", exist_ok=True)\n        self.cache_path = \"./cache/affine_ocsort.pkl\"\n        self.cache = {}\n        if os.path.exists(self.cache_path):\n            with open(self.cache_path, \"rb\") as fp:\n                self.cache = pickle.load(fp)\n        self.minimum_features = minimum_features\n        self.prev_img = None\n        self.prev_desc = None\n        self.sparse_flow_param = dict(\n            maxCorners=3000,\n            qualityLevel=0.01,\n            minDistance=1,\n            blockSize=3,\n            useHarrisDetector=False,\n            k=0.04,\n        )\n        self.file_computed = {}\n\n        self.comp_function = None\n        if method == \"sparse\":\n            self.comp_function = self._affine_sparse_flow\n        elif method == \"sift\":\n            self.comp_function = self._affine_sift\n        # Same BoT-SORT CMC arrays\n        elif method == \"file\":\n            self.comp_function = self._affine_file\n            self.file_affines = {}\n            # Maps from tag name to file name\n            self.file_names = {}\n\n            # All the ablation file names\n            for f_name in os.listdir(\"./cache/cmc_files/MOT17_ablation/\"):\n                # The tag that'll be passed into compute_affine based on image name\n                tag = f_name.replace(\"GMC-\", \"\").replace(\".txt\", \"\") + \"-FRCNN\"\n                f_name = os.path.join(\"./cache/cmc_files/MOT17_ablation/\", f_name)\n                self.file_names[tag] = f_name\n            for f_name in os.listdir(\"./cache/cmc_files/MOT20_ablation/\"):\n                tag = f_name.replace(\"GMC-\", \"\").replace(\".txt\", \"\")\n                f_name = os.path.join(\"./cache/cmc_files/MOT20_ablation/\", f_name)\n                self.file_names[tag] = f_name\n\n            # All the test file names\n            for f_name in os.listdir(\"./cache/cmc_files/MOTChallenge/\"):\n                tag = f_name.replace(\"GMC-\", \"\").replace(\".txt\", \"\")\n                if \"MOT17\" in tag:\n                    tag = tag + \"-FRCNN\"\n                # If it's an ablation one (not test) don't overwrite it\n                if tag in self.file_names:\n                    continue\n                f_name = os.path.join(\"./cache/cmc_files/MOTChallenge/\", f_name)\n                self.file_names[tag] = f_name\n\n    def compute_affine(self, img, bbox, tag):\n        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n        if tag in self.cache:\n            A = self.cache[tag]\n            return A\n        mask = np.ones_like(img, dtype=np.uint8)\n        if bbox.shape[0] > 0:\n            bbox = np.round(bbox).astype(np.int32)\n            bbox[bbox < 0] = 0\n            for bb in bbox:\n                mask[bb[1] : bb[3], bb[0] : bb[2]] = 0\n\n        A = self.comp_function(img, mask, tag)\n        self.cache[tag] = A\n\n        return A\n\n    def _load_file(self, name):\n        affines = []\n        with open(self.file_names[name], \"r\") as fp:\n            for line in fp:\n                tokens = [float(f) for f in line.split(\"\\t\")[1:7]]\n                A = np.eye(2, 3)\n                A[0, 0] = tokens[0]\n                A[0, 1] = tokens[1]\n                A[0, 2] = tokens[2]\n                A[1, 0] = tokens[3]\n                A[1, 1] = tokens[4]\n                A[1, 2] = tokens[5]\n                affines.append(A)\n        self.file_affines[name] = affines\n\n    def _affine_file(self, frame, mask, tag):\n        name, num = tag.split(\":\")\n        if name not in self.file_affines:\n            self._load_file(name)\n        if name not in self.file_affines:\n            raise RuntimeError(\"Error loading file affines for CMC.\")\n\n        return self.file_affines[name][int(num) - 1]\n\n    def _affine_sift(self, frame, mask, tag):\n        A = np.eye(2, 3)\n        detector = cv2.SIFT_create()\n        kp, desc = detector.detectAndCompute(frame, mask)\n        if self.prev_desc is None:\n            self.prev_desc = [kp, desc]\n            return A\n        if desc.shape[0] < self.minimum_features or self.prev_desc[1].shape[0] < self.minimum_features:\n            return A\n\n        bf = cv2.BFMatcher(cv2.NORM_L2)\n        matches = bf.knnMatch(self.prev_desc[1], desc, k=2)\n        good = []\n        for m, n in matches:\n            if m.distance < 0.7 * n.distance:\n                good.append(m)\n\n        if len(good) > self.minimum_features:\n            src_pts = np.float32([self.prev_desc[0][m.queryIdx].pt for m in good]).reshape(-1, 1, 2)\n            dst_pts = np.float32([kp[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)\n            A, _ = cv2.estimateAffinePartial2D(src_pts, dst_pts, method=cv2.RANSAC)\n        else:\n            print(\"Warning: not enough matching points\")\n        if A is None:\n            A = np.eye(2, 3)\n\n        self.prev_desc = [kp, desc]\n        return A\n\n    def _affine_sparse_flow(self, frame, mask, tag):\n        # Initialize\n        A = np.eye(2, 3)\n\n        # find the keypoints\n        keypoints = cv2.goodFeaturesToTrack(frame, mask=mask, **self.sparse_flow_param)\n\n        # Handle first frame\n        if self.prev_img is None:\n            self.prev_img = frame\n            self.prev_desc = keypoints\n            return A\n\n        matched_kp, status, err = cv2.calcOpticalFlowPyrLK(self.prev_img, frame, self.prev_desc, None)\n        matched_kp = matched_kp.reshape(-1, 2)\n        status = status.reshape(-1)\n        prev_points = self.prev_desc.reshape(-1, 2)\n        prev_points = prev_points[status]\n        curr_points = matched_kp[status]\n\n        # Find rigid matrix\n        if prev_points.shape[0] > self.minimum_features:\n            A, _ = cv2.estimateAffinePartial2D(prev_points, curr_points, method=cv2.RANSAC)\n        else:\n            print(\"Warning: not enough matching points\")\n        if A is None:\n            A = np.eye(2, 3)\n\n        self.prev_img = frame\n        self.prev_desc = keypoints\n        return A\n\n    def dump_cache(self):\n        with open(self.cache_path, \"wb\") as fp:\n            pickle.dump(self.cache, fp)\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/configs/deepocsort.yaml",
    "content": "# Trial number:      137\n# HOTA, MOTA, IDF1:  [55.567]\ndeepocsort:\n  asso_func: giou\n  conf_thres: 0.5122620708221085\n  delta_t: 1\n  det_thresh: 0\n  inertia: 0.3941737016672115\n  iou_thresh: 0.22136877277096445\n  max_age: 50\n  min_hits: 1\n  use_byte: false\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/embedding.py",
    "content": "import pdb\nfrom collections import OrderedDict\nimport os\nimport pickle\n\nimport torch\nimport cv2\nimport torchvision\nimport numpy as np\n\n\n\nclass EmbeddingComputer:\n    def __init__(self, dataset):\n        self.model = None\n        self.dataset = dataset\n        self.crop_size = (128, 384)\n        os.makedirs(\"./cache/embeddings/\", exist_ok=True)\n        self.cache_path = \"./cache/embeddings/{}_embedding.pkl\"\n        self.cache = {}\n        self.cache_name = \"\"\n\n    def load_cache(self, path):\n        self.cache_name = path\n        cache_path = self.cache_path.format(path)\n        if os.path.exists(cache_path):\n            with open(cache_path, \"rb\") as fp:\n                self.cache = pickle.load(fp)\n\n    def compute_embedding(self, img, bbox, tag, is_numpy=True):\n        if self.cache_name != tag.split(\":\")[0]:\n            self.load_cache(tag.split(\":\")[0])\n\n        if tag in self.cache:\n            embs = self.cache[tag]\n            if embs.shape[0] != bbox.shape[0]:\n                raise RuntimeError(\n                    \"ERROR: The number of cached embeddings don't match the \"\n                    \"number of detections.\\nWas the detector model changed? Delete cache if so.\"\n                )\n            return embs\n\n        if self.model is None:\n            self.initialize_model()\n\n        # Make sure bbox is within image frame\n        if is_numpy:\n            h, w = img.shape[:2]\n        else:\n            h, w = img.shape[2:]\n        results = np.round(bbox).astype(np.int32)\n        results[:, 0] = results[:, 0].clip(0, w)\n        results[:, 1] = results[:, 1].clip(0, h)\n        results[:, 2] = results[:, 2].clip(0, w)\n        results[:, 3] = results[:, 3].clip(0, h)\n\n        # Generate all the crops\n        crops = []\n        for p in results:\n            if is_numpy:\n                crop = img[p[1] : p[3], p[0] : p[2]]\n                crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)\n                crop = cv2.resize(crop, self.crop_size, interpolation=cv2.INTER_LINEAR)\n                crop = torch.as_tensor(crop.astype(\"float32\").transpose(2, 0, 1))\n                crop = crop.unsqueeze(0)\n            else:\n                crop = img[:, :, p[1] : p[3], p[0] : p[2]]\n                crop = torchvision.transforms.functional.resize(crop, self.crop_size)\n\n            crops.append(crop)\n\n        crops = torch.cat(crops, dim=0)\n\n        # Create embeddings and l2 normalize them\n        with torch.no_grad():\n            crops = crops.cuda()\n            crops = crops.half()\n            embs = self.model(crops)\n        embs = torch.nn.functional.normalize(embs)\n        embs = embs.cpu().numpy()\n\n        self.cache[tag] = embs\n        return embs\n\n    def initialize_model(self):\n        \"\"\"\n        model = torchreid.models.build_model(name=\"osnet_ain_x1_0\", num_classes=2510, loss=\"softmax\", pretrained=False)\n        sd = torch.load(\"external/weights/osnet_ain_ms_d_c.pth.tar\")[\"state_dict\"]\n        new_state_dict = OrderedDict()\n        for k, v in sd.items():\n            name = k[7:]  # remove `module.`\n            new_state_dict[name] = v\n        # load params\n        model.load_state_dict(new_state_dict)\n        model.eval()\n        model.cuda()\n        \"\"\"\n        if self.dataset == \"mot17\":\n            path = \"external/weights/mot17_sbs_S50.pth\"\n        elif self.dataset == \"mot20\":\n            path = \"external/weights/mot20_sbs_S50.pth\"\n        elif self.dataset == \"dance\":\n            path = None\n        else:\n            raise RuntimeError(\"Need the path for a new ReID model.\")\n\n        model = FastReID(path)\n        model.eval()\n        model.cuda()\n        model.half()\n        self.model = model\n\n    def dump_cache(self):\n        if self.cache_name:\n            with open(self.cache_path.format(self.cache_name), \"wb\") as fp:\n                pickle.dump(self.cache, fp)\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/kalmanfilter.py",
    "content": "# -*- coding: utf-8 -*-\n# pylint: disable=invalid-name, too-many-arguments, too-many-branches,\n# pylint: disable=too-many-locals, too-many-instance-attributes, too-many-lines\n\n\"\"\"\nThis module implements the linear Kalman filter in both an object\noriented and procedural form. The KalmanFilter class implements\nthe filter by storing the various matrices in instance variables,\nminimizing the amount of bookkeeping you have to do.\nAll Kalman filters operate with a predict->update cycle. The\npredict step, implemented with the method or function predict(),\nuses the state transition matrix F to predict the state in the next\ntime period (epoch). The state is stored as a gaussian (x, P), where\nx is the state (column) vector, and P is its covariance. Covariance\nmatrix Q specifies the process covariance. In Bayesian terms, this\nprediction is called the *prior*, which you can think of colloquially\nas the estimate prior to incorporating the measurement.\nThe update step, implemented with the method or function `update()`,\nincorporates the measurement z with covariance R, into the state\nestimate (x, P). The class stores the system uncertainty in S,\nthe innovation (residual between prediction and measurement in\nmeasurement space) in y, and the Kalman gain in k. The procedural\nform returns these variables to you. In Bayesian terms this computes\nthe *posterior* - the estimate after the information from the\nmeasurement is incorporated.\nWhether you use the OO form or procedural form is up to you. If\nmatrices such as H, R, and F are changing each epoch, you'll probably\nopt to use the procedural form. If they are unchanging, the OO\nform is perhaps easier to use since you won't need to keep track\nof these matrices. This is especially useful if you are implementing\nbanks of filters or comparing various KF designs for performance;\na trivial coding bug could lead to using the wrong sets of matrices.\nThis module also offers an implementation of the RTS smoother, and\nother helper functions, such as log likelihood computations.\nThe Saver class allows you to easily save the state of the\nKalmanFilter class after every update\nThis module expects NumPy arrays for all values that expect\narrays, although in a few cases, particularly method parameters,\nit will accept types that convert to NumPy arrays, such as lists\nof lists. These exceptions are documented in the method or function.\nExamples\n--------\nThe following example constructs a constant velocity kinematic\nfilter, filters noisy data, and plots the results. It also demonstrates\nusing the Saver class to save the state of the filter at each epoch.\n.. code-block:: Python\n    import matplotlib.pyplot as plt\n    import numpy as np\n    from filterpy.kalman import KalmanFilter\n    from filterpy.common import Q_discrete_white_noise, Saver\n    r_std, q_std = 2., 0.003\n    cv = KalmanFilter(dim_x=2, dim_z=1)\n    cv.x = np.array([[0., 1.]]) # position, velocity\n    cv.F = np.array([[1, dt],[ [0, 1]])\n    cv.R = np.array([[r_std^^2]])\n    f.H = np.array([[1., 0.]])\n    f.P = np.diag([.1^^2, .03^^2)\n    f.Q = Q_discrete_white_noise(2, dt, q_std**2)\n    saver = Saver(cv)\n    for z in range(100):\n        cv.predict()\n        cv.update([z + randn() * r_std])\n        saver.save() # save the filter's state\n    saver.to_array()\n    plt.plot(saver.x[:, 0])\n    # plot all of the priors\n    plt.plot(saver.x_prior[:, 0])\n    # plot mahalanobis distance\n    plt.figure()\n    plt.plot(saver.mahalanobis)\nThis code implements the same filter using the procedural form\n    x = np.array([[0., 1.]]) # position, velocity\n    F = np.array([[1, dt],[ [0, 1]])\n    R = np.array([[r_std^^2]])\n    H = np.array([[1., 0.]])\n    P = np.diag([.1^^2, .03^^2)\n    Q = Q_discrete_white_noise(2, dt, q_std**2)\n    for z in range(100):\n        x, P = predict(x, P, F=F, Q=Q)\n        x, P = update(x, P, z=[z + randn() * r_std], R=R, H=H)\n        xs.append(x[0, 0])\n    plt.plot(xs)\nFor more examples see the test subdirectory, or refer to the\nbook cited below. In it I both teach Kalman filtering from basic\nprinciples, and teach the use of this library in great detail.\nFilterPy library.\nhttp://github.com/rlabbe/filterpy\nDocumentation at:\nhttps://filterpy.readthedocs.org\nSupporting book at:\nhttps://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python\nThis is licensed under an MIT license. See the readme.MD file\nfor more information.\nCopyright 2014-2018 Roger R Labbe Jr.\n\"\"\"\n\nfrom __future__ import absolute_import, division\n\nimport pdb\nfrom copy import deepcopy\nfrom math import log, exp, sqrt\nimport sys\nimport numpy as np\nfrom numpy import dot, zeros, eye, isscalar, shape\nimport numpy.linalg as linalg\nfrom filterpy.stats import logpdf\nfrom filterpy.common import pretty_str, reshape_z\n\n\nclass KalmanFilterNew(object):\n    \"\"\"Implements a Kalman filter. You are responsible for setting the\n    various state variables to reasonable values; the defaults  will\n    not give you a functional filter.\n    For now the best documentation is my free book Kalman and Bayesian\n    Filters in Python [2]_. The test files in this directory also give you a\n    basic idea of use, albeit without much description.\n    In brief, you will first construct this object, specifying the size of\n    the state vector with dim_x and the size of the measurement vector that\n    you will be using with dim_z. These are mostly used to perform size checks\n    when you assign values to the various matrices. For example, if you\n    specified dim_z=2 and then try to assign a 3x3 matrix to R (the\n    measurement noise matrix you will get an assert exception because R\n    should be 2x2. (If for whatever reason you need to alter the size of\n    things midstream just use the underscore version of the matrices to\n    assign directly: your_filter._R = a_3x3_matrix.)\n    After construction the filter will have default matrices created for you,\n    but you must specify the values for each. It’s usually easiest to just\n    overwrite them rather than assign to each element yourself. This will be\n    clearer in the example below. All are of type numpy.array.\n    Examples\n    --------\n    Here is a filter that tracks position and velocity using a sensor that only\n    reads position.\n    First construct the object with the required dimensionality. Here the state\n    (`dim_x`) has 2 coefficients (position and velocity), and the measurement\n    (`dim_z`) has one. In FilterPy `x` is the state, `z` is the measurement.\n    .. code::\n        from filterpy.kalman import KalmanFilter\n        f = KalmanFilter (dim_x=2, dim_z=1)\n    Assign the initial value for the state (position and velocity). You can do this\n    with a two dimensional array like so:\n        .. code::\n            f.x = np.array([[2.],    # position\n                            [0.]])   # velocity\n    or just use a one dimensional array, which I prefer doing.\n    .. code::\n        f.x = np.array([2., 0.])\n    Define the state transition matrix:\n        .. code::\n            f.F = np.array([[1.,1.],\n                            [0.,1.]])\n    Define the measurement function. Here we need to convert a position-velocity\n    vector into just a position vector, so we use:\n        .. code::\n        f.H = np.array([[1., 0.]])\n    Define the state's covariance matrix P.\n    .. code::\n        f.P = np.array([[1000.,    0.],\n                        [   0., 1000.] ])\n    Now assign the measurement noise. Here the dimension is 1x1, so I can\n    use a scalar\n    .. code::\n        f.R = 5\n    I could have done this instead:\n    .. code::\n        f.R = np.array([[5.]])\n    Note that this must be a 2 dimensional array.\n    Finally, I will assign the process noise. Here I will take advantage of\n    another FilterPy library function:\n    .. code::\n        from filterpy.common import Q_discrete_white_noise\n        f.Q = Q_discrete_white_noise(dim=2, dt=0.1, var=0.13)\n    Now just perform the standard predict/update loop:\n    .. code::\n        while some_condition_is_true:\n            z = get_sensor_reading()\n            f.predict()\n            f.update(z)\n            do_something_with_estimate (f.x)\n    **Procedural Form**\n    This module also contains stand alone functions to perform Kalman filtering.\n    Use these if you are not a fan of objects.\n    **Example**\n    .. code::\n        while True:\n            z, R = read_sensor()\n            x, P = predict(x, P, F, Q)\n            x, P = update(x, P, z, R, H)\n    See my book Kalman and Bayesian Filters in Python [2]_.\n    You will have to set the following attributes after constructing this\n    object for the filter to perform properly. Please note that there are\n    various checks in place to ensure that you have made everything the\n    'correct' size. However, it is possible to provide incorrectly sized\n    arrays such that the linear algebra can not perform an operation.\n    It can also fail silently - you can end up with matrices of a size that\n    allows the linear algebra to work, but are the wrong shape for the problem\n    you are trying to solve.\n    Parameters\n    ----------\n    dim_x : int\n        Number of state variables for the Kalman filter. For example, if\n        you are tracking the position and velocity of an object in two\n        dimensions, dim_x would be 4.\n        This is used to set the default size of P, Q, and u\n    dim_z : int\n        Number of of measurement inputs. For example, if the sensor\n        provides you with position in (x,y), dim_z would be 2.\n    dim_u : int (optional)\n        size of the control input, if it is being used.\n        Default value of 0 indicates it is not used.\n    compute_log_likelihood : bool (default = True)\n        Computes log likelihood by default, but this can be a slow\n        computation, so if you never use it you can turn this computation\n        off.\n    Attributes\n    ----------\n    x : numpy.array(dim_x, 1)\n        Current state estimate. Any call to update() or predict() updates\n        this variable.\n    P : numpy.array(dim_x, dim_x)\n        Current state covariance matrix. Any call to update() or predict()\n        updates this variable.\n    x_prior : numpy.array(dim_x, 1)\n        Prior (predicted) state estimate. The *_prior and *_post attributes\n        are for convenience; they store the  prior and posterior of the\n        current epoch. Read Only.\n    P_prior : numpy.array(dim_x, dim_x)\n        Prior (predicted) state covariance matrix. Read Only.\n    x_post : numpy.array(dim_x, 1)\n        Posterior (updated) state estimate. Read Only.\n    P_post : numpy.array(dim_x, dim_x)\n        Posterior (updated) state covariance matrix. Read Only.\n    z : numpy.array\n        Last measurement used in update(). Read only.\n    R : numpy.array(dim_z, dim_z)\n        Measurement noise covariance matrix. Also known as the\n        observation covariance.\n    Q : numpy.array(dim_x, dim_x)\n        Process noise covariance matrix. Also known as the transition\n        covariance.\n    F : numpy.array()\n        State Transition matrix. Also known as `A` in some formulation.\n    H : numpy.array(dim_z, dim_x)\n        Measurement function. Also known as the observation matrix, or as `C`.\n    y : numpy.array\n        Residual of the update step. Read only.\n    K : numpy.array(dim_x, dim_z)\n        Kalman gain of the update step. Read only.\n    S :  numpy.array\n        System uncertainty (P projected to measurement space). Read only.\n    SI :  numpy.array\n        Inverse system uncertainty. Read only.\n    log_likelihood : float\n        log-likelihood of the last measurement. Read only.\n    likelihood : float\n        likelihood of last measurement. Read only.\n        Computed from the log-likelihood. The log-likelihood can be very\n        small,  meaning a large negative value such as -28000. Taking the\n        exp() of that results in 0.0, which can break typical algorithms\n        which multiply by this value, so by default we always return a\n        number >= sys.float_info.min.\n    mahalanobis : float\n        mahalanobis distance of the innovation. Read only.\n    inv : function, default numpy.linalg.inv\n        If you prefer another inverse function, such as the Moore-Penrose\n        pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv\n        This is only used to invert self.S. If you know it is diagonal, you\n        might choose to set it to filterpy.common.inv_diagonal, which is\n        several times faster than numpy.linalg.inv for diagonal matrices.\n    alpha : float\n        Fading memory setting. 1.0 gives the normal Kalman filter, and\n        values slightly larger than 1.0 (such as 1.02) give a fading\n        memory effect - previous measurements have less influence on the\n        filter's estimates. This formulation of the Fading memory filter\n        (there are many) is due to Dan Simon [1]_.\n    References\n    ----------\n    .. [1] Dan Simon. \"Optimal State Estimation.\" John Wiley & Sons.\n       p. 208-212. (2006)\n    .. [2] Roger Labbe. \"Kalman and Bayesian Filters in Python\"\n       https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python\n    \"\"\"\n\n    def __init__(self, dim_x, dim_z, dim_u=0):\n        if dim_x < 1:\n            raise ValueError(\"dim_x must be 1 or greater\")\n        if dim_z < 1:\n            raise ValueError(\"dim_z must be 1 or greater\")\n        if dim_u < 0:\n            raise ValueError(\"dim_u must be 0 or greater\")\n\n        self.dim_x = dim_x\n        self.dim_z = dim_z\n        self.dim_u = dim_u\n\n        self.x = zeros((dim_x, 1))  # state\n        self.P = eye(dim_x)  # uncertainty covariance\n        self.Q = eye(dim_x)  # process uncertainty\n        self.B = None  # control transition matrix\n        self.F = eye(dim_x)  # state transition matrix\n        self.H = zeros((dim_z, dim_x))  # measurement function\n        self.R = eye(dim_z)  # measurement uncertainty\n        self._alpha_sq = 1.0  # fading memory control\n        self.M = np.zeros((dim_x, dim_z))  # process-measurement cross correlation\n        self.z = np.array([[None] * self.dim_z]).T\n\n        # gain and residual are computed during the innovation step. We\n        # save them so that in case you want to inspect them for various\n        # purposes\n        self.K = np.zeros((dim_x, dim_z))  # kalman gain\n        self.y = zeros((dim_z, 1))\n        self.S = np.zeros((dim_z, dim_z))  # system uncertainty\n        self.SI = np.zeros((dim_z, dim_z))  # inverse system uncertainty\n\n        # identity matrix. Do not alter this.\n        self._I = np.eye(dim_x)\n\n        # these will always be a copy of x,P after predict() is called\n        self.x_prior = self.x.copy()\n        self.P_prior = self.P.copy()\n\n        # these will always be a copy of x,P after update() is called\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n        # Only computed only if requested via property\n        self._log_likelihood = log(sys.float_info.min)\n        self._likelihood = sys.float_info.min\n        self._mahalanobis = None\n\n        # keep all observations\n        self.history_obs = []\n\n        self.inv = np.linalg.inv\n\n        self.attr_saved = None\n        self.observed = False\n        self.last_measurement = None\n\n    def predict(self, u=None, B=None, F=None, Q=None):\n        \"\"\"\n        Predict next state (prior) using the Kalman filter state propagation\n        equations.\n        Parameters\n        ----------\n        u : np.array, default 0\n            Optional control vector.\n        B : np.array(dim_x, dim_u), or None\n            Optional control transition matrix; a value of None\n            will cause the filter to use `self.B`.\n        F : np.array(dim_x, dim_x), or None\n            Optional state transition matrix; a value of None\n            will cause the filter to use `self.F`.\n        Q : np.array(dim_x, dim_x), scalar, or None\n            Optional process noise matrix; a value of None will cause the\n            filter to use `self.Q`.\n        \"\"\"\n\n        if B is None:\n            B = self.B\n        if F is None:\n            F = self.F\n        if Q is None:\n            Q = self.Q\n        elif isscalar(Q):\n            Q = eye(self.dim_x) * Q\n\n        # x = Fx + Bu\n        if B is not None and u is not None:\n            self.x = dot(F, self.x) + dot(B, u)\n        else:\n            self.x = dot(F, self.x)\n\n        # P = FPF' + Q\n        self.P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q\n\n        # save prior\n        self.x_prior = self.x.copy()\n        self.P_prior = self.P.copy()\n\n    def freeze(self):\n        \"\"\"\n        Save the parameters before non-observation forward\n        \"\"\"\n        self.attr_saved = deepcopy(self.__dict__)\n\n    def apply_affine_correction(self, m, t, new_kf):\n        \"\"\"\n        Apply to both last state and last observation for OOS smoothing.\n\n        Messy due to internal logic for kalman filter being messy.\n        \"\"\"\n        if new_kf:\n            big_m = np.kron(np.eye(4, dtype=float), m)\n            self.x = big_m @ self.x\n            self.x[:2] += t\n            self.P = big_m @ self.P @ big_m.T\n\n            # If frozen, also need to update the frozen state for OOS\n            if not self.observed and self.attr_saved is not None:\n                self.attr_saved[\"x\"] = big_m @ self.attr_saved[\"x\"]\n                self.attr_saved[\"x\"][:2] += t\n                self.attr_saved[\"P\"] = big_m @ self.attr_saved[\"P\"] @ big_m.T\n                self.attr_saved[\"last_measurement\"][:2] = m @ self.attr_saved[\"last_measurement\"][:2] + t\n                self.attr_saved[\"last_measurement\"][2:] = m @ self.attr_saved[\"last_measurement\"][2:]\n        else:\n            scale = np.linalg.norm(m[:, 0])\n            self.x[:2] = m @ self.x[:2] + t\n            self.x[4:6] = m @ self.x[4:6]\n            # self.x[2] *= scale\n            # self.x[6] *= scale\n\n            self.P[:2, :2] = m @ self.P[:2, :2] @ m.T\n            self.P[4:6, 4:6] = m @ self.P[4:6, 4:6] @ m.T\n            # self.P[2, 2] *= 2 * scale\n            # self.P[6, 6] *= 2 * scale\n\n            # If frozen, also need to update the frozen state for OOS\n            if not self.observed and self.attr_saved is not None:\n                self.attr_saved[\"x\"][:2] = m @ self.attr_saved[\"x\"][:2] + t\n                self.attr_saved[\"x\"][4:6] = m @ self.attr_saved[\"x\"][4:6]\n                # self.attr_saved[\"x\"][2] *= scale\n                # self.attr_saved[\"x\"][6] *= scale\n\n                self.attr_saved[\"P\"][:2, :2] = m @ self.attr_saved[\"P\"][:2, :2] @ m.T\n                self.attr_saved[\"P\"][4:6, 4:6] = m @ self.attr_saved[\"P\"][4:6, 4:6] @ m.T\n                # self.attr_saved[\"P\"][2, 2] *= 2 * scale\n                # self.attr_saved[\"P\"][6, 6] *= 2 * scale\n\n                self.attr_saved[\"last_measurement\"][:2] = m @ self.attr_saved[\"last_measurement\"][:2] + t\n                # self.attr_saved[\"last_measurement\"][2] *= scale\n\n    def unfreeze(self):\n        if self.attr_saved is not None:\n            new_history = deepcopy(self.history_obs)\n            self.__dict__ = self.attr_saved\n            # self.history_obs = new_history\n            self.history_obs = self.history_obs[:-1]\n            occur = [int(d is None) for d in new_history]\n            indices = np.where(np.array(occur) == 0)[0]\n            index1 = indices[-2]\n            index2 = indices[-1]\n            # box1 = new_history[index1]\n            box1 = self.last_measurement\n            x1, y1, s1, r1 = box1\n            w1 = np.sqrt(s1 * r1)\n            h1 = np.sqrt(s1 / r1)\n            box2 = new_history[index2]\n            x2, y2, s2, r2 = box2\n            w2 = np.sqrt(s2 * r2)\n            h2 = np.sqrt(s2 / r2)\n            time_gap = index2 - index1\n            dx = (x2 - x1) / time_gap\n            dy = (y2 - y1) / time_gap\n            dw = (w2 - w1) / time_gap\n            dh = (h2 - h1) / time_gap\n            for i in range(index2 - index1):\n                \"\"\"\n                The default virtual trajectory generation is by linear\n                motion (constant speed hypothesis), you could modify this\n                part to implement your own.\n                \"\"\"\n                x = x1 + (i + 1) * dx\n                y = y1 + (i + 1) * dy\n                w = w1 + (i + 1) * dw\n                h = h1 + (i + 1) * dh\n                s = w * h\n                r = w / float(h)\n                new_box = np.array([x, y, s, r]).reshape((4, 1))\n                \"\"\"\n                    I still use predict-update loop here to refresh the parameters,\n                    but this can be faster by directly modifying the internal parameters\n                    as suggested in the paper. I keep this naive but slow way for \n                    easy read and understanding\n                \"\"\"\n                self.update(new_box)\n                if not i == (index2 - index1 - 1):\n                    self.predict()\n\n    def update(self, z, R=None, H=None):\n        \"\"\"\n        Add a new measurement (z) to the Kalman filter.\n        If z is None, nothing is computed. However, x_post and P_post are\n        updated with the prior (x_prior, P_prior), and self.z is set to None.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n            If you pass in a value of H, z must be a column vector the\n            of the correct size.\n        R : np.array, scalar, or None\n            Optionally provide R to override the measurement noise for this\n            one call, otherwise  self.R will be used.\n        H : np.array, or None\n            Optionally provide H to override the measurement function for this\n            one call, otherwise self.H will be used.\n        \"\"\"\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n        # append the observation\n        self.history_obs.append(z)\n\n        if z is None:\n            if self.observed:\n                \"\"\"\n                Got no observation so freeze the current parameters for future\n                potential online smoothing.\n                \"\"\"\n                self.last_measurement = self.history_obs[-2]\n                self.freeze()\n            self.observed = False\n            self.z = np.array([[None] * self.dim_z]).T\n            self.x_post = self.x.copy()\n            self.P_post = self.P.copy()\n            self.y = zeros((self.dim_z, 1))\n            return\n\n        # self.observed = True\n        if not self.observed:\n            \"\"\"\n            Get observation, use online smoothing to re-update parameters\n            \"\"\"\n            self.unfreeze()\n        self.observed = True\n\n        if R is None:\n            R = self.R\n        elif isscalar(R):\n            R = eye(self.dim_z) * R\n\n        if H is None:\n            z = reshape_z(z, self.dim_z, self.x.ndim)\n            H = self.H\n\n        # y = z - Hx\n        # error (residual) between measurement and prediction\n        self.y = z - dot(H, self.x)\n\n        # common subexpression for speed\n        PHT = dot(self.P, H.T)\n\n        # S = HPH' + R\n        # project system uncertainty into measurement space\n        self.S = dot(H, PHT) + R\n        self.SI = self.inv(self.S)\n        # K = PH'inv(S)\n        # map system uncertainty into kalman gain\n        self.K = dot(PHT, self.SI)\n\n        # x = x + Ky\n        # predict new x with residual scaled by the kalman gain\n        self.x = self.x + dot(self.K, self.y)\n\n        # P = (I-KH)P(I-KH)' + KRK'\n        # This is more numerically stable\n        # and works for non-optimal K vs the equation\n        # P = (I-KH)P usually seen in the literature.\n\n        I_KH = self._I - dot(self.K, H)\n        self.P = dot(dot(I_KH, self.P), I_KH.T) + dot(dot(self.K, R), self.K.T)\n\n        # save measurement and posterior state\n        self.z = deepcopy(z)\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n    def md_for_measurement(self, z):\n        \"\"\"Mahalanobis distance for any measurement.\n\n        Should be run after a prediction() call.\n        \"\"\"\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n        H = self.H\n        y = z - dot(H, self.x)\n        md = sqrt(float(dot(dot(y.T, self.SI), y)))\n        return md\n\n    def predict_steadystate(self, u=0, B=None):\n        \"\"\"\n        Predict state (prior) using the Kalman filter state propagation\n        equations. Only x is updated, P is left unchanged. See\n        update_steadstate() for a longer explanation of when to use this\n        method.\n        Parameters\n        ----------\n        u : np.array\n            Optional control vector. If non-zero, it is multiplied by B\n            to create the control input into the system.\n        B : np.array(dim_x, dim_u), or None\n            Optional control transition matrix; a value of None\n            will cause the filter to use `self.B`.\n        \"\"\"\n\n        if B is None:\n            B = self.B\n\n        # x = Fx + Bu\n        if B is not None:\n            self.x = dot(self.F, self.x) + dot(B, u)\n        else:\n            self.x = dot(self.F, self.x)\n\n        # save prior\n        self.x_prior = self.x.copy()\n        self.P_prior = self.P.copy()\n\n    def update_steadystate(self, z):\n        \"\"\"\n        Add a new measurement (z) to the Kalman filter without recomputing\n        the Kalman gain K, the state covariance P, or the system\n        uncertainty S.\n        You can use this for LTI systems since the Kalman gain and covariance\n        converge to a fixed value. Precompute these and assign them explicitly,\n        or run the Kalman filter using the normal predict()/update(0 cycle\n        until they converge.\n        The main advantage of this call is speed. We do significantly less\n        computation, notably avoiding a costly matrix inversion.\n        Use in conjunction with predict_steadystate(), otherwise P will grow\n        without bound.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        Examples\n        --------\n        >>> cv = kinematic_kf(dim=3, order=2) # 3D const velocity filter\n        >>> # let filter converge on representative data, then save k and P\n        >>> for i in range(100):\n        >>>     cv.predict()\n        >>>     cv.update([i, i, i])\n        >>> saved_k = np.copy(cv.K)\n        >>> saved_P = np.copy(cv.P)\n        later on:\n        >>> cv = kinematic_kf(dim=3, order=2) # 3D const velocity filter\n        >>> cv.K = np.copy(saved_K)\n        >>> cv.P = np.copy(saved_P)\n        >>> for i in range(100):\n        >>>     cv.predict_steadystate()\n        >>>     cv.update_steadystate([i, i, i])\n        \"\"\"\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n        if z is None:\n            self.z = np.array([[None] * self.dim_z]).T\n            self.x_post = self.x.copy()\n            self.P_post = self.P.copy()\n            self.y = zeros((self.dim_z, 1))\n            return\n\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n\n        # y = z - Hx\n        # error (residual) between measurement and prediction\n        self.y = z - dot(self.H, self.x)\n\n        # x = x + Ky\n        # predict new x with residual scaled by the kalman gain\n        self.x = self.x + dot(self.K, self.y)\n\n        self.z = deepcopy(z)\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n    def update_correlated(self, z, R=None, H=None):\n        \"\"\"Add a new measurement (z) to the Kalman filter assuming that\n        process noise and measurement noise are correlated as defined in\n        the `self.M` matrix.\n        A partial derivation can be found in [1]\n        If z is None, nothing is changed.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        R : np.array, scalar, or None\n            Optionally provide R to override the measurement noise for this\n            one call, otherwise  self.R will be used.\n        H : np.array,  or None\n            Optionally provide H to override the measurement function for this\n            one call, otherwise  self.H will be used.\n        References\n        ----------\n        .. [1] Bulut, Y. (2011). Applied Kalman filter theory (Doctoral dissertation, Northeastern University).\n               http://people.duke.edu/~hpgavin/SystemID/References/Balut-KalmanFilter-PhD-NEU-2011.pdf\n        \"\"\"\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n        if z is None:\n            self.z = np.array([[None] * self.dim_z]).T\n            self.x_post = self.x.copy()\n            self.P_post = self.P.copy()\n            self.y = zeros((self.dim_z, 1))\n            return\n\n        if R is None:\n            R = self.R\n        elif isscalar(R):\n            R = eye(self.dim_z) * R\n\n        # rename for readability and a tiny extra bit of speed\n        if H is None:\n            z = reshape_z(z, self.dim_z, self.x.ndim)\n            H = self.H\n\n        # handle special case: if z is in form [[z]] but x is not a column\n        # vector dimensions will not match\n        if self.x.ndim == 1 and shape(z) == (1, 1):\n            z = z[0]\n\n        if shape(z) == ():  # is it scalar, e.g. z=3 or z=np.array(3)\n            z = np.asarray([z])\n\n        # y = z - Hx\n        # error (residual) between measurement and prediction\n        self.y = z - dot(H, self.x)\n\n        # common subexpression for speed\n        PHT = dot(self.P, H.T)\n\n        # project system uncertainty into measurement space\n        self.S = dot(H, PHT) + dot(H, self.M) + dot(self.M.T, H.T) + R\n        self.SI = self.inv(self.S)\n\n        # K = PH'inv(S)\n        # map system uncertainty into kalman gain\n        self.K = dot(PHT + self.M, self.SI)\n\n        # x = x + Ky\n        # predict new x with residual scaled by the kalman gain\n        self.x = self.x + dot(self.K, self.y)\n        self.P = self.P - dot(self.K, dot(H, self.P) + self.M.T)\n\n        self.z = deepcopy(z)\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n    def batch_filter(\n        self,\n        zs,\n        Fs=None,\n        Qs=None,\n        Hs=None,\n        Rs=None,\n        Bs=None,\n        us=None,\n        update_first=False,\n        saver=None,\n    ):\n        \"\"\"Batch processes a sequences of measurements.\n         Parameters\n         ----------\n         zs : list-like\n             list of measurements at each time step `self.dt`. Missing\n             measurements must be represented by `None`.\n         Fs : None, list-like, default=None\n             optional value or list of values to use for the state transition\n             matrix F.\n             If Fs is None then self.F is used for all epochs.\n             Otherwise it must contain a list-like list of F's, one for\n             each epoch.  This allows you to have varying F per epoch.\n         Qs : None, np.array or list-like, default=None\n             optional value or list of values to use for the process error\n             covariance Q.\n             If Qs is None then self.Q is used for all epochs.\n             Otherwise it must contain a list-like list of Q's, one for\n             each epoch.  This allows you to have varying Q per epoch.\n         Hs : None, np.array or list-like, default=None\n             optional list of values to use for the measurement matrix H.\n             If Hs is None then self.H is used for all epochs.\n             If Hs contains a single matrix, then it is used as H for all\n             epochs.\n             Otherwise it must contain a list-like list of H's, one for\n             each epoch.  This allows you to have varying H per epoch.\n         Rs : None, np.array or list-like, default=None\n             optional list of values to use for the measurement error\n             covariance R.\n             If Rs is None then self.R is used for all epochs.\n             Otherwise it must contain a list-like list of R's, one for\n             each epoch.  This allows you to have varying R per epoch.\n         Bs : None, np.array or list-like, default=None\n             optional list of values to use for the control transition matrix B.\n             If Bs is None then self.B is used for all epochs.\n             Otherwise it must contain a list-like list of B's, one for\n             each epoch.  This allows you to have varying B per epoch.\n         us : None, np.array or list-like, default=None\n             optional list of values to use for the control input vector;\n             If us is None then None is used for all epochs (equivalent to 0,\n             or no control input).\n             Otherwise it must contain a list-like list of u's, one for\n             each epoch.\n        update_first : bool, optional, default=False\n             controls whether the order of operations is update followed by\n             predict, or predict followed by update. Default is predict->update.\n         saver : filterpy.common.Saver, optional\n             filterpy.common.Saver object. If provided, saver.save() will be\n             called after every epoch\n         Returns\n         -------\n         means : np.array((n,dim_x,1))\n             array of the state for each time step after the update. Each entry\n             is an np.array. In other words `means[k,:]` is the state at step\n             `k`.\n         covariance : np.array((n,dim_x,dim_x))\n             array of the covariances for each time step after the update.\n             In other words `covariance[k,:,:]` is the covariance at step `k`.\n         means_predictions : np.array((n,dim_x,1))\n             array of the state for each time step after the predictions. Each\n             entry is an np.array. In other words `means[k,:]` is the state at\n             step `k`.\n         covariance_predictions : np.array((n,dim_x,dim_x))\n             array of the covariances for each time step after the prediction.\n             In other words `covariance[k,:,:]` is the covariance at step `k`.\n         Examples\n         --------\n         .. code-block:: Python\n             # this example demonstrates tracking a measurement where the time\n             # between measurement varies, as stored in dts. This requires\n             # that F be recomputed for each epoch. The output is then smoothed\n             # with an RTS smoother.\n             zs = [t + random.randn()*4 for t in range (40)]\n             Fs = [np.array([[1., dt], [0, 1]] for dt in dts]\n             (mu, cov, _, _) = kf.batch_filter(zs, Fs=Fs)\n             (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs)\n        \"\"\"\n\n        # pylint: disable=too-many-statements\n        n = np.size(zs, 0)\n        if Fs is None:\n            Fs = [self.F] * n\n        if Qs is None:\n            Qs = [self.Q] * n\n        if Hs is None:\n            Hs = [self.H] * n\n        if Rs is None:\n            Rs = [self.R] * n\n        if Bs is None:\n            Bs = [self.B] * n\n        if us is None:\n            us = [0] * n\n\n        # mean estimates from Kalman Filter\n        if self.x.ndim == 1:\n            means = zeros((n, self.dim_x))\n            means_p = zeros((n, self.dim_x))\n        else:\n            means = zeros((n, self.dim_x, 1))\n            means_p = zeros((n, self.dim_x, 1))\n\n        # state covariances from Kalman Filter\n        covariances = zeros((n, self.dim_x, self.dim_x))\n        covariances_p = zeros((n, self.dim_x, self.dim_x))\n\n        if update_first:\n            for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n                self.update(z, R=R, H=H)\n                means[i, :] = self.x\n                covariances[i, :, :] = self.P\n\n                self.predict(u=u, B=B, F=F, Q=Q)\n                means_p[i, :] = self.x\n                covariances_p[i, :, :] = self.P\n\n                if saver is not None:\n                    saver.save()\n        else:\n            for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n                self.predict(u=u, B=B, F=F, Q=Q)\n                means_p[i, :] = self.x\n                covariances_p[i, :, :] = self.P\n\n                self.update(z, R=R, H=H)\n                means[i, :] = self.x\n                covariances[i, :, :] = self.P\n\n                if saver is not None:\n                    saver.save()\n\n        return (means, covariances, means_p, covariances_p)\n\n    def rts_smoother(self, Xs, Ps, Fs=None, Qs=None, inv=np.linalg.inv):\n        \"\"\"\n        Runs the Rauch-Tung-Striebel Kalman smoother on a set of\n        means and covariances computed by a Kalman filter. The usual input\n        would come from the output of `KalmanFilter.batch_filter()`.\n        Parameters\n        ----------\n        Xs : numpy.array\n           array of the means (state variable x) of the output of a Kalman\n           filter.\n        Ps : numpy.array\n            array of the covariances of the output of a kalman filter.\n        Fs : list-like collection of numpy.array, optional\n            State transition matrix of the Kalman filter at each time step.\n            Optional, if not provided the filter's self.F will be used\n        Qs : list-like collection of numpy.array, optional\n            Process noise of the Kalman filter at each time step. Optional,\n            if not provided the filter's self.Q will be used\n        inv : function, default numpy.linalg.inv\n            If you prefer another inverse function, such as the Moore-Penrose\n            pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv\n        Returns\n        -------\n        x : numpy.ndarray\n           smoothed means\n        P : numpy.ndarray\n           smoothed state covariances\n        K : numpy.ndarray\n            smoother gain at each step\n        Pp : numpy.ndarray\n           Predicted state covariances\n        Examples\n        --------\n        .. code-block:: Python\n            zs = [t + random.randn()*4 for t in range (40)]\n            (mu, cov, _, _) = kalman.batch_filter(zs)\n            (x, P, K, Pp) = rts_smoother(mu, cov, kf.F, kf.Q)\n        \"\"\"\n\n        if len(Xs) != len(Ps):\n            raise ValueError(\"length of Xs and Ps must be the same\")\n\n        n = Xs.shape[0]\n        dim_x = Xs.shape[1]\n\n        if Fs is None:\n            Fs = [self.F] * n\n        if Qs is None:\n            Qs = [self.Q] * n\n\n        # smoother gain\n        K = zeros((n, dim_x, dim_x))\n\n        x, P, Pp = Xs.copy(), Ps.copy(), Ps.copy()\n        for k in range(n - 2, -1, -1):\n            Pp[k] = dot(dot(Fs[k + 1], P[k]), Fs[k + 1].T) + Qs[k + 1]\n\n            # pylint: disable=bad-whitespace\n            K[k] = dot(dot(P[k], Fs[k + 1].T), inv(Pp[k]))\n            x[k] += dot(K[k], x[k + 1] - dot(Fs[k + 1], x[k]))\n            P[k] += dot(dot(K[k], P[k + 1] - Pp[k]), K[k].T)\n\n        return (x, P, K, Pp)\n\n    def get_prediction(self, u=None, B=None, F=None, Q=None):\n        \"\"\"\n        Predict next state (prior) using the Kalman filter state propagation\n        equations and returns it without modifying the object.\n        Parameters\n        ----------\n        u : np.array, default 0\n            Optional control vector.\n        B : np.array(dim_x, dim_u), or None\n            Optional control transition matrix; a value of None\n            will cause the filter to use `self.B`.\n        F : np.array(dim_x, dim_x), or None\n            Optional state transition matrix; a value of None\n            will cause the filter to use `self.F`.\n        Q : np.array(dim_x, dim_x), scalar, or None\n            Optional process noise matrix; a value of None will cause the\n            filter to use `self.Q`.\n        Returns\n        -------\n        (x, P) : tuple\n            State vector and covariance array of the prediction.\n        \"\"\"\n\n        if B is None:\n            B = self.B\n        if F is None:\n            F = self.F\n        if Q is None:\n            Q = self.Q\n        elif isscalar(Q):\n            Q = eye(self.dim_x) * Q\n\n        # x = Fx + Bu\n        if B is not None and u is not None:\n            x = dot(F, self.x) + dot(B, u)\n        else:\n            x = dot(F, self.x)\n\n        # P = FPF' + Q\n        P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q\n\n        return x, P\n\n    def get_update(self, z=None):\n        \"\"\"\n        Computes the new estimate based on measurement `z` and returns it\n        without altering the state of the filter.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        Returns\n        -------\n        (x, P) : tuple\n            State vector and covariance array of the update.\n        \"\"\"\n\n        if z is None:\n            return self.x, self.P\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n\n        R = self.R\n        H = self.H\n        P = self.P\n        x = self.x\n\n        # error (residual) between measurement and prediction\n        y = z - dot(H, x)\n\n        # common subexpression for speed\n        PHT = dot(P, H.T)\n\n        # project system uncertainty into measurement space\n        S = dot(H, PHT) + R\n\n        # map system uncertainty into kalman gain\n        K = dot(PHT, self.inv(S))\n\n        # predict new x with residual scaled by the kalman gain\n        x = x + dot(K, y)\n\n        # P = (I-KH)P(I-KH)' + KRK'\n        I_KH = self._I - dot(K, H)\n        P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)\n\n        return x, P\n\n    def residual_of(self, z):\n        \"\"\"\n        Returns the residual for the given measurement (z). Does not alter\n        the state of the filter.\n        \"\"\"\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n        return z - dot(self.H, self.x_prior)\n\n    def measurement_of_state(self, x):\n        \"\"\"\n        Helper function that converts a state into a measurement.\n        Parameters\n        ----------\n        x : np.array\n            kalman state vector\n        Returns\n        -------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        \"\"\"\n\n        return dot(self.H, x)\n\n    @property\n    def log_likelihood(self):\n        \"\"\"\n        log-likelihood of the last measurement.\n        \"\"\"\n        if self._log_likelihood is None:\n            self._log_likelihood = logpdf(x=self.y, cov=self.S)\n        return self._log_likelihood\n\n    @property\n    def likelihood(self):\n        \"\"\"\n        Computed from the log-likelihood. The log-likelihood can be very\n        small,  meaning a large negative value such as -28000. Taking the\n        exp() of that results in 0.0, which can break typical algorithms\n        which multiply by this value, so by default we always return a\n        number >= sys.float_info.min.\n        \"\"\"\n        if self._likelihood is None:\n            self._likelihood = exp(self.log_likelihood)\n            if self._likelihood == 0:\n                self._likelihood = sys.float_info.min\n        return self._likelihood\n\n    @property\n    def mahalanobis(self):\n        \"\"\" \"\n        Mahalanobis distance of measurement. E.g. 3 means measurement\n        was 3 standard deviations away from the predicted value.\n        Returns\n        -------\n        mahalanobis : float\n        \"\"\"\n        if self._mahalanobis is None:\n            self._mahalanobis = sqrt(float(dot(dot(self.y.T, self.SI), self.y)))\n        return self._mahalanobis\n\n    @property\n    def alpha(self):\n        \"\"\"\n        Fading memory setting. 1.0 gives the normal Kalman filter, and\n        values slightly larger than 1.0 (such as 1.02) give a fading\n        memory effect - previous measurements have less influence on the\n        filter's estimates. This formulation of the Fading memory filter\n        (there are many) is due to Dan Simon [1]_.\n        \"\"\"\n        return self._alpha_sq**0.5\n\n    def log_likelihood_of(self, z):\n        \"\"\"\n        log likelihood of the measurement `z`. This should only be called\n        after a call to update(). Calling after predict() will yield an\n        incorrect result.\"\"\"\n\n        if z is None:\n            return log(sys.float_info.min)\n        return logpdf(z, dot(self.H, self.x), self.S)\n\n    @alpha.setter\n    def alpha(self, value):\n        if not np.isscalar(value) or value < 1:\n            raise ValueError(\"alpha must be a float greater than 1\")\n\n        self._alpha_sq = value**2\n\n    def __repr__(self):\n        return \"\\n\".join(\n            [\n                \"KalmanFilter object\",\n                pretty_str(\"dim_x\", self.dim_x),\n                pretty_str(\"dim_z\", self.dim_z),\n                pretty_str(\"dim_u\", self.dim_u),\n                pretty_str(\"x\", self.x),\n                pretty_str(\"P\", self.P),\n                pretty_str(\"x_prior\", self.x_prior),\n                pretty_str(\"P_prior\", self.P_prior),\n                pretty_str(\"x_post\", self.x_post),\n                pretty_str(\"P_post\", self.P_post),\n                pretty_str(\"F\", self.F),\n                pretty_str(\"Q\", self.Q),\n                pretty_str(\"R\", self.R),\n                pretty_str(\"H\", self.H),\n                pretty_str(\"K\", self.K),\n                pretty_str(\"y\", self.y),\n                pretty_str(\"S\", self.S),\n                pretty_str(\"SI\", self.SI),\n                pretty_str(\"M\", self.M),\n                pretty_str(\"B\", self.B),\n                pretty_str(\"z\", self.z),\n                pretty_str(\"log-likelihood\", self.log_likelihood),\n                pretty_str(\"likelihood\", self.likelihood),\n                pretty_str(\"mahalanobis\", self.mahalanobis),\n                pretty_str(\"alpha\", self.alpha),\n                pretty_str(\"inv\", self.inv),\n            ]\n        )\n\n    def test_matrix_dimensions(self, z=None, H=None, R=None, F=None, Q=None):\n        \"\"\"\n        Performs a series of asserts to check that the size of everything\n        is what it should be. This can help you debug problems in your design.\n        If you pass in H, R, F, Q those will be used instead of this object's\n        value for those matrices.\n        Testing `z` (the measurement) is problamatic. x is a vector, and can be\n        implemented as either a 1D array or as a nx1 column vector. Thus Hx\n        can be of different shapes. Then, if Hx is a single value, it can\n        be either a 1D array or 2D vector. If either is true, z can reasonably\n        be a scalar (either '3' or np.array('3') are scalars under this\n        definition), a 1D, 1 element array, or a 2D, 1 element array. You are\n        allowed to pass in any combination that works.\n        \"\"\"\n\n        if H is None:\n            H = self.H\n        if R is None:\n            R = self.R\n        if F is None:\n            F = self.F\n        if Q is None:\n            Q = self.Q\n        x = self.x\n        P = self.P\n\n        assert x.ndim == 1 or x.ndim == 2, \"x must have one or two dimensions, but has {}\".format(x.ndim)\n\n        if x.ndim == 1:\n            assert x.shape[0] == self.dim_x, \"Shape of x must be ({},{}), but is {}\".format(self.dim_x, 1, x.shape)\n        else:\n            assert x.shape == (\n                self.dim_x,\n                1,\n            ), \"Shape of x must be ({},{}), but is {}\".format(self.dim_x, 1, x.shape)\n\n        assert P.shape == (\n            self.dim_x,\n            self.dim_x,\n        ), \"Shape of P must be ({},{}), but is {}\".format(self.dim_x, self.dim_x, P.shape)\n\n        assert Q.shape == (\n            self.dim_x,\n            self.dim_x,\n        ), \"Shape of Q must be ({},{}), but is {}\".format(self.dim_x, self.dim_x, P.shape)\n\n        assert F.shape == (\n            self.dim_x,\n            self.dim_x,\n        ), \"Shape of F must be ({},{}), but is {}\".format(self.dim_x, self.dim_x, F.shape)\n\n        assert np.ndim(H) == 2, \"Shape of H must be (dim_z, {}), but is {}\".format(P.shape[0], shape(H))\n\n        assert H.shape[1] == P.shape[0], \"Shape of H must be (dim_z, {}), but is {}\".format(P.shape[0], H.shape)\n\n        # shape of R must be the same as HPH'\n        hph_shape = (H.shape[0], H.shape[0])\n        r_shape = shape(R)\n\n        if H.shape[0] == 1:\n            # r can be scalar, 1D, or 2D in this case\n            assert r_shape in [\n                (),\n                (1,),\n                (1, 1),\n            ], \"R must be scalar or one element array, but is shaped {}\".format(r_shape)\n        else:\n            assert r_shape == hph_shape, \"shape of R should be {} but it is {}\".format(hph_shape, r_shape)\n\n        if z is not None:\n            z_shape = shape(z)\n        else:\n            z_shape = (self.dim_z, 1)\n\n        # H@x must have shape of z\n        Hx = dot(H, x)\n\n        if z_shape == ():  # scalar or np.array(scalar)\n            assert Hx.ndim == 1 or shape(Hx) == (\n                1,\n                1,\n            ), \"shape of z should be {}, not {} for the given H\".format(shape(Hx), z_shape)\n\n        elif shape(Hx) == (1,):\n            assert z_shape[0] == 1, \"Shape of z must be {} for the given H\".format(shape(Hx))\n\n        else:\n            assert z_shape == shape(Hx) or (\n                len(z_shape) == 1 and shape(Hx) == (z_shape[0], 1)\n            ), \"shape of z should be {}, not {} for the given H\".format(shape(Hx), z_shape)\n\n        if np.ndim(Hx) > 1 and shape(Hx) != (1, 1):\n            assert shape(Hx) == z_shape, \"shape of z should be {} for the given H, but it is {}\".format(\n                shape(Hx), z_shape\n            )\n\n\ndef update(x, P, z, R, H=None, return_all=False):\n    \"\"\"\n    Add a new measurement (z) to the Kalman filter. If z is None, nothing\n    is changed.\n    This can handle either the multidimensional or unidimensional case. If\n    all parameters are floats instead of arrays the filter will still work,\n    and return floats for x, P as the result.\n    update(1, 2, 1, 1, 1)  # univariate\n    update(x, P, 1\n    Parameters\n    ----------\n    x : numpy.array(dim_x, 1), or float\n        State estimate vector\n    P : numpy.array(dim_x, dim_x), or float\n        Covariance matrix\n    z : (dim_z, 1): array_like\n        measurement for this update. z can be a scalar if dim_z is 1,\n        otherwise it must be convertible to a column vector.\n    R : numpy.array(dim_z, dim_z), or float\n        Measurement noise matrix\n    H : numpy.array(dim_x, dim_x), or float, optional\n        Measurement function. If not provided, a value of 1 is assumed.\n    return_all : bool, default False\n        If true, y, K, S, and log_likelihood are returned, otherwise\n        only x and P are returned.\n    Returns\n    -------\n    x : numpy.array\n        Posterior state estimate vector\n    P : numpy.array\n        Posterior covariance matrix\n    y : numpy.array or scalar\n        Residua. Difference between measurement and state in measurement space\n    K : numpy.array\n        Kalman gain\n    S : numpy.array\n        System uncertainty in measurement space\n    log_likelihood : float\n        log likelihood of the measurement\n    \"\"\"\n\n    # pylint: disable=bare-except\n\n    if z is None:\n        if return_all:\n            return x, P, None, None, None, None\n        return x, P\n\n    if H is None:\n        H = np.array([1])\n\n    if np.isscalar(H):\n        H = np.array([H])\n\n    Hx = np.atleast_1d(dot(H, x))\n    z = reshape_z(z, Hx.shape[0], x.ndim)\n\n    # error (residual) between measurement and prediction\n    y = z - Hx\n\n    # project system uncertainty into measurement space\n    S = dot(dot(H, P), H.T) + R\n\n    # map system uncertainty into kalman gain\n    try:\n        K = dot(dot(P, H.T), linalg.inv(S))\n    except:\n        # can't invert a 1D array, annoyingly\n        K = dot(dot(P, H.T), 1.0 / S)\n\n    # predict new x with residual scaled by the kalman gain\n    x = x + dot(K, y)\n\n    # P = (I-KH)P(I-KH)' + KRK'\n    KH = dot(K, H)\n\n    try:\n        I_KH = np.eye(KH.shape[0]) - KH\n    except:\n        I_KH = np.array([1 - KH])\n    P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)\n\n    if return_all:\n        # compute log likelihood\n        log_likelihood = logpdf(z, dot(H, x), S)\n        return x, P, y, K, S, log_likelihood\n    return x, P\n\n\ndef update_steadystate(x, z, K, H=None):\n    \"\"\"\n    Add a new measurement (z) to the Kalman filter. If z is None, nothing\n    is changed.\n    Parameters\n    ----------\n    x : numpy.array(dim_x, 1), or float\n        State estimate vector\n    z : (dim_z, 1): array_like\n        measurement for this update. z can be a scalar if dim_z is 1,\n        otherwise it must be convertible to a column vector.\n    K : numpy.array, or float\n        Kalman gain matrix\n    H : numpy.array(dim_x, dim_x), or float, optional\n        Measurement function. If not provided, a value of 1 is assumed.\n    Returns\n    -------\n    x : numpy.array\n        Posterior state estimate vector\n    Examples\n    --------\n    This can handle either the multidimensional or unidimensional case. If\n    all parameters are floats instead of arrays the filter will still work,\n    and return floats for x, P as the result.\n    >>> update_steadystate(1, 2, 1)  # univariate\n    >>> update_steadystate(x, P, z, H)\n    \"\"\"\n\n    if z is None:\n        return x\n\n    if H is None:\n        H = np.array([1])\n\n    if np.isscalar(H):\n        H = np.array([H])\n\n    Hx = np.atleast_1d(dot(H, x))\n    z = reshape_z(z, Hx.shape[0], x.ndim)\n\n    # error (residual) between measurement and prediction\n    y = z - Hx\n\n    # estimate new x with residual scaled by the kalman gain\n    return x + dot(K, y)\n\n\ndef predict(x, P, F=1, Q=0, u=0, B=1, alpha=1.0):\n    \"\"\"\n    Predict next state (prior) using the Kalman filter state propagation\n    equations.\n    Parameters\n    ----------\n    x : numpy.array\n        State estimate vector\n    P : numpy.array\n        Covariance matrix\n    F : numpy.array()\n        State Transition matrix\n    Q : numpy.array, Optional\n        Process noise matrix\n    u : numpy.array, Optional, default 0.\n        Control vector. If non-zero, it is multiplied by B\n        to create the control input into the system.\n    B : numpy.array, optional, default 0.\n        Control transition matrix.\n    alpha : float, Optional, default=1.0\n        Fading memory setting. 1.0 gives the normal Kalman filter, and\n        values slightly larger than 1.0 (such as 1.02) give a fading\n        memory effect - previous measurements have less influence on the\n        filter's estimates. This formulation of the Fading memory filter\n        (there are many) is due to Dan Simon\n    Returns\n    -------\n    x : numpy.array\n        Prior state estimate vector\n    P : numpy.array\n        Prior covariance matrix\n    \"\"\"\n\n    if np.isscalar(F):\n        F = np.array(F)\n    x = dot(F, x) + dot(B, u)\n    P = (alpha * alpha) * dot(dot(F, P), F.T) + Q\n\n    return x, P\n\n\ndef predict_steadystate(x, F=1, u=0, B=1):\n    \"\"\"\n    Predict next state (prior) using the Kalman filter state propagation\n    equations. This steady state form only computes x, assuming that the\n    covariance is constant.\n    Parameters\n    ----------\n    x : numpy.array\n        State estimate vector\n    P : numpy.array\n        Covariance matrix\n    F : numpy.array()\n        State Transition matrix\n    u : numpy.array, Optional, default 0.\n        Control vector. If non-zero, it is multiplied by B\n        to create the control input into the system.\n    B : numpy.array, optional, default 0.\n        Control transition matrix.\n    Returns\n    -------\n    x : numpy.array\n        Prior state estimate vector\n    \"\"\"\n\n    if np.isscalar(F):\n        F = np.array(F)\n    x = dot(F, x) + dot(B, u)\n\n    return x\n\n\ndef batch_filter(x, P, zs, Fs, Qs, Hs, Rs, Bs=None, us=None, update_first=False, saver=None):\n    \"\"\"\n    Batch processes a sequences of measurements.\n    Parameters\n    ----------\n    zs : list-like\n        list of measurements at each time step. Missing measurements must be\n        represented by None.\n    Fs : list-like\n        list of values to use for the state transition matrix matrix.\n    Qs : list-like\n        list of values to use for the process error\n        covariance.\n    Hs : list-like\n        list of values to use for the measurement matrix.\n    Rs : list-like\n        list of values to use for the measurement error\n        covariance.\n    Bs : list-like, optional\n        list of values to use for the control transition matrix;\n        a value of None in any position will cause the filter\n        to use `self.B` for that time step.\n    us : list-like, optional\n        list of values to use for the control input vector;\n        a value of None in any position will cause the filter to use\n        0 for that time step.\n    update_first : bool, optional\n        controls whether the order of operations is update followed by\n        predict, or predict followed by update. Default is predict->update.\n        saver : filterpy.common.Saver, optional\n            filterpy.common.Saver object. If provided, saver.save() will be\n            called after every epoch\n    Returns\n    -------\n    means : np.array((n,dim_x,1))\n        array of the state for each time step after the update. Each entry\n        is an np.array. In other words `means[k,:]` is the state at step\n        `k`.\n    covariance : np.array((n,dim_x,dim_x))\n        array of the covariances for each time step after the update.\n        In other words `covariance[k,:,:]` is the covariance at step `k`.\n    means_predictions : np.array((n,dim_x,1))\n        array of the state for each time step after the predictions. Each\n        entry is an np.array. In other words `means[k,:]` is the state at\n        step `k`.\n    covariance_predictions : np.array((n,dim_x,dim_x))\n        array of the covariances for each time step after the prediction.\n        In other words `covariance[k,:,:]` is the covariance at step `k`.\n    Examples\n    --------\n    .. code-block:: Python\n        zs = [t + random.randn()*4 for t in range (40)]\n        Fs = [kf.F for t in range (40)]\n        Hs = [kf.H for t in range (40)]\n        (mu, cov, _, _) = kf.batch_filter(zs, Rs=R_list, Fs=Fs, Hs=Hs, Qs=None,\n                                          Bs=None, us=None, update_first=False)\n        (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs, Qs=None)\n    \"\"\"\n\n    n = np.size(zs, 0)\n    dim_x = x.shape[0]\n\n    # mean estimates from Kalman Filter\n    if x.ndim == 1:\n        means = zeros((n, dim_x))\n        means_p = zeros((n, dim_x))\n    else:\n        means = zeros((n, dim_x, 1))\n        means_p = zeros((n, dim_x, 1))\n\n    # state covariances from Kalman Filter\n    covariances = zeros((n, dim_x, dim_x))\n    covariances_p = zeros((n, dim_x, dim_x))\n\n    if us is None:\n        us = [0.0] * n\n        Bs = [0.0] * n\n\n    if update_first:\n        for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n            x, P = update(x, P, z, R=R, H=H)\n            means[i, :] = x\n            covariances[i, :, :] = P\n\n            x, P = predict(x, P, u=u, B=B, F=F, Q=Q)\n            means_p[i, :] = x\n            covariances_p[i, :, :] = P\n            if saver is not None:\n                saver.save()\n    else:\n        for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n            x, P = predict(x, P, u=u, B=B, F=F, Q=Q)\n            means_p[i, :] = x\n            covariances_p[i, :, :] = P\n\n            x, P = update(x, P, z, R=R, H=H)\n            means[i, :] = x\n            covariances[i, :, :] = P\n            if saver is not None:\n                saver.save()\n\n    return (means, covariances, means_p, covariances_p)\n\n\ndef rts_smoother(Xs, Ps, Fs, Qs):\n    \"\"\"\n    Runs the Rauch-Tung-Striebel Kalman smoother on a set of\n    means and covariances computed by a Kalman filter. The usual input\n    would come from the output of `KalmanFilter.batch_filter()`.\n    Parameters\n    ----------\n    Xs : numpy.array\n       array of the means (state variable x) of the output of a Kalman\n       filter.\n    Ps : numpy.array\n        array of the covariances of the output of a kalman filter.\n    Fs : list-like collection of numpy.array\n        State transition matrix of the Kalman filter at each time step.\n    Qs : list-like collection of numpy.array, optional\n        Process noise of the Kalman filter at each time step.\n    Returns\n    -------\n    x : numpy.ndarray\n       smoothed means\n    P : numpy.ndarray\n       smoothed state covariances\n    K : numpy.ndarray\n        smoother gain at each step\n    pP : numpy.ndarray\n       predicted state covariances\n    Examples\n    --------\n    .. code-block:: Python\n        zs = [t + random.randn()*4 for t in range (40)]\n        (mu, cov, _, _) = kalman.batch_filter(zs)\n        (x, P, K, pP) = rts_smoother(mu, cov, kf.F, kf.Q)\n    \"\"\"\n\n    if len(Xs) != len(Ps):\n        raise ValueError(\"length of Xs and Ps must be the same\")\n\n    n = Xs.shape[0]\n    dim_x = Xs.shape[1]\n\n    # smoother gain\n    K = zeros((n, dim_x, dim_x))\n    x, P, pP = Xs.copy(), Ps.copy(), Ps.copy()\n\n    for k in range(n - 2, -1, -1):\n        pP[k] = dot(dot(Fs[k], P[k]), Fs[k].T) + Qs[k]\n\n        # pylint: disable=bad-whitespace\n        K[k] = dot(dot(P[k], Fs[k].T), linalg.inv(pP[k]))\n        x[k] += dot(K[k], x[k + 1] - dot(Fs[k], x[k]))\n        P[k] += dot(dot(K[k], P[k + 1] - pP[k]), K[k].T)\n\n    return (x, P, K, pP)\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/ocsort.py",
    "content": "\"\"\"\n    This script is adopted from the SORT script by Alex Bewley alex@bewley.ai\n\"\"\"\nfrom __future__ import print_function\n\nimport pdb\nimport pickle\n\nimport cv2\nimport torch\nimport torchvision\n\nimport numpy as np\nfrom .association import *\nfrom .embedding import EmbeddingComputer\nfrom .cmc import CMCComputer\nfrom .reid_multibackend import ReIDDetectMultiBackend\nfrom ultralytics.yolo.utils.ops import xyxy2xywh\n\n\n\ndef k_previous_obs(observations, cur_age, k):\n    if len(observations) == 0:\n        return [-1, -1, -1, -1, -1]\n    for i in range(k):\n        dt = k - i\n        if cur_age - dt in observations:\n            return observations[cur_age - dt]\n    max_age = max(observations.keys())\n    return observations[max_age]\n\n\ndef convert_bbox_to_z(bbox):\n    \"\"\"\n    Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form\n      [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is\n      the aspect ratio\n    \"\"\"\n    w = bbox[2] - bbox[0]\n    h = bbox[3] - bbox[1]\n    x = bbox[0] + w / 2.0\n    y = bbox[1] + h / 2.0\n    s = w * h  # scale is just area\n    r = w / float(h + 1e-6)\n    return np.array([x, y, s, r]).reshape((4, 1))\n\n\ndef convert_bbox_to_z_new(bbox):\n    w = bbox[2] - bbox[0]\n    h = bbox[3] - bbox[1]\n    x = bbox[0] + w / 2.0\n    y = bbox[1] + h / 2.0\n    return np.array([x, y, w, h]).reshape((4, 1))\n\n\ndef convert_x_to_bbox_new(x):\n    x, y, w, h = x.reshape(-1)[:4]\n    return np.array([x - w / 2, y - h / 2, x + w / 2, y + h / 2]).reshape(1, 4)\n\n\ndef convert_x_to_bbox(x, score=None):\n    \"\"\"\n    Takes a bounding box in the centre form [x,y,s,r] and returns it in the form\n      [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right\n    \"\"\"\n    w = np.sqrt(x[2] * x[3])\n    h = x[2] / w\n    if score == None:\n        return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0]).reshape((1, 4))\n    else:\n        return np.array([x[0] - w / 2.0, x[1] - h / 2.0, x[0] + w / 2.0, x[1] + h / 2.0, score]).reshape((1, 5))\n\n\ndef speed_direction(bbox1, bbox2):\n    cx1, cy1 = (bbox1[0] + bbox1[2]) / 2.0, (bbox1[1] + bbox1[3]) / 2.0\n    cx2, cy2 = (bbox2[0] + bbox2[2]) / 2.0, (bbox2[1] + bbox2[3]) / 2.0\n    speed = np.array([cy2 - cy1, cx2 - cx1])\n    norm = np.sqrt((cy2 - cy1) ** 2 + (cx2 - cx1) ** 2) + 1e-6\n    return speed / norm\n\n\ndef new_kf_process_noise(w, h, p=1 / 20, v=1 / 160):\n    Q = np.diag(\n        ((p * w) ** 2, (p * h) ** 2, (p * w) ** 2, (p * h) ** 2, (v * w) ** 2, (v * h) ** 2, (v * w) ** 2, (v * h) ** 2)\n    )\n    return Q\n\n\ndef new_kf_measurement_noise(w, h, m=1 / 20):\n    w_var = (m * w) ** 2\n    h_var = (m * h) ** 2\n    R = np.diag((w_var, h_var, w_var, h_var))\n    return R\n\n\nclass KalmanBoxTracker(object):\n    \"\"\"\n    This class represents the internal state of individual tracked objects observed as bbox.\n    \"\"\"\n\n    count = 0\n\n    def __init__(self, bbox, cls, delta_t=3, orig=False, emb=None, alpha=0, new_kf=False):\n        \"\"\"\n        Initialises a tracker using initial bounding box.\n\n        \"\"\"\n        # define constant velocity model\n        if not orig:\n            from .kalmanfilter import KalmanFilterNew as KalmanFilter\n        else:\n            from filterpy.kalman import KalmanFilter\n        self.cls = cls\n        self.conf = bbox[-1]\n        self.new_kf = new_kf\n        if new_kf:\n            self.kf = KalmanFilter(dim_x=8, dim_z=4)\n            self.kf.F = np.array(\n                [\n                    # x y w h x' y' w' h'\n                    [1, 0, 0, 0, 1, 0, 0, 0],\n                    [0, 1, 0, 0, 0, 1, 0, 0],\n                    [0, 0, 1, 0, 0, 0, 1, 0],\n                    [0, 0, 0, 1, 0, 0, 0, 1],\n                    [0, 0, 0, 0, 1, 0, 0, 0],\n                    [0, 0, 0, 0, 0, 1, 0, 0],\n                    [0, 0, 0, 0, 0, 0, 1, 0],\n                    [0, 0, 0, 0, 0, 0, 0, 1],\n                ]\n            )\n            self.kf.H = np.array(\n                [\n                    [1, 0, 0, 0, 0, 0, 0, 0],\n                    [0, 1, 0, 0, 0, 0, 0, 0],\n                    [0, 0, 1, 0, 0, 0, 0, 0],\n                    [0, 0, 0, 1, 0, 0, 0, 0],\n                ]\n            )\n            _, _, w, h = convert_bbox_to_z_new(bbox).reshape(-1)\n            self.kf.P = new_kf_process_noise(w, h)\n            self.kf.P[:4, :4] *= 4\n            self.kf.P[4:, 4:] *= 100\n            # Process and measurement uncertainty happen in functions\n            self.bbox_to_z_func = convert_bbox_to_z_new\n            self.x_to_bbox_func = convert_x_to_bbox_new\n        else:\n            self.kf = KalmanFilter(dim_x=7, dim_z=4)\n            self.kf.F = np.array(\n                [\n                    # x  y  s  r  x' y' s'\n                    [1, 0, 0, 0, 1, 0, 0],\n                    [0, 1, 0, 0, 0, 1, 0],\n                    [0, 0, 1, 0, 0, 0, 1],\n                    [0, 0, 0, 1, 0, 0, 0],\n                    [0, 0, 0, 0, 1, 0, 0],\n                    [0, 0, 0, 0, 0, 1, 0],\n                    [0, 0, 0, 0, 0, 0, 1],\n                ]\n            )\n            self.kf.H = np.array(\n                [\n                    [1, 0, 0, 0, 0, 0, 0],\n                    [0, 1, 0, 0, 0, 0, 0],\n                    [0, 0, 1, 0, 0, 0, 0],\n                    [0, 0, 0, 1, 0, 0, 0],\n                ]\n            )\n            self.kf.R[2:, 2:] *= 10.0\n            self.kf.P[4:, 4:] *= 1000.0  # give high uncertainty to the unobservable initial velocities\n            self.kf.P *= 10.0\n            self.kf.Q[-1, -1] *= 0.01\n            self.kf.Q[4:, 4:] *= 0.01\n            self.bbox_to_z_func = convert_bbox_to_z\n            self.x_to_bbox_func = convert_x_to_bbox\n\n        self.kf.x[:4] = self.bbox_to_z_func(bbox)\n\n        self.time_since_update = 0\n        self.id = KalmanBoxTracker.count\n        KalmanBoxTracker.count += 1\n        self.history = []\n        self.hits = 0\n        self.hit_streak = 0\n        self.age = 0\n        \"\"\"\n        NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of \n        function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a \n        fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.\n        \"\"\"\n        # Used for OCR\n        self.last_observation = np.array([-1, -1, -1, -1, -1])  # placeholder\n        # Used to output track after min_hits reached\n        self.history_observations = []\n        # Used for velocity\n        self.observations = dict()\n        self.velocity = None\n        self.delta_t = delta_t\n\n        self.emb = emb\n\n        self.frozen = False\n\n    def update(self, bbox, cls):\n        \"\"\"\n        Updates the state vector with observed bbox.\n        \"\"\"\n        if bbox is not None:\n            self.frozen = False\n            self.cls = cls\n            if self.last_observation.sum() >= 0:  # no previous observation\n                previous_box = None\n                for dt in range(self.delta_t, 0, -1):\n                    if self.age - dt in self.observations:\n                        previous_box = self.observations[self.age - dt]\n                        break\n                if previous_box is None:\n                    previous_box = self.last_observation\n                \"\"\"\n                  Estimate the track speed direction with observations \\Delta t steps away\n                \"\"\"\n                self.velocity = speed_direction(previous_box, bbox)\n            \"\"\"\n              Insert new observations. This is a ugly way to maintain both self.observations\n              and self.history_observations. Bear it for the moment.\n            \"\"\"\n            self.last_observation = bbox\n            self.observations[self.age] = bbox\n            self.history_observations.append(bbox)\n\n            self.time_since_update = 0\n            self.history = []\n            self.hits += 1\n            self.hit_streak += 1\n            if self.new_kf:\n                R = new_kf_measurement_noise(self.kf.x[2, 0], self.kf.x[3, 0])\n                self.kf.update(self.bbox_to_z_func(bbox), R=R)\n            else:\n                self.kf.update(self.bbox_to_z_func(bbox))\n        else:\n            self.kf.update(bbox)\n            self.frozen = True\n\n    def update_emb(self, emb, alpha=0.9):\n        self.emb = alpha * self.emb + (1 - alpha) * emb\n        self.emb /= np.linalg.norm(self.emb)\n\n    def get_emb(self):\n        return self.emb.cpu()\n\n    def apply_affine_correction(self, affine):\n        m = affine[:, :2]\n        t = affine[:, 2].reshape(2, 1)\n        # For OCR\n        if self.last_observation.sum() > 0:\n            ps = self.last_observation[:4].reshape(2, 2).T\n            ps = m @ ps + t\n            self.last_observation[:4] = ps.T.reshape(-1)\n\n        # Apply to each box in the range of velocity computation\n        for dt in range(self.delta_t, -1, -1):\n            if self.age - dt in self.observations:\n                ps = self.observations[self.age - dt][:4].reshape(2, 2).T\n                ps = m @ ps + t\n                self.observations[self.age - dt][:4] = ps.T.reshape(-1)\n\n        # Also need to change kf state, but might be frozen\n        self.kf.apply_affine_correction(m, t, self.new_kf)\n\n    def predict(self):\n        \"\"\"\n        Advances the state vector and returns the predicted bounding box estimate.\n        \"\"\"\n        # Don't allow negative bounding boxes\n        if self.new_kf:\n            if self.kf.x[2] + self.kf.x[6] <= 0:\n                self.kf.x[6] = 0\n            if self.kf.x[3] + self.kf.x[7] <= 0:\n                self.kf.x[7] = 0\n\n            # Stop velocity, will update in kf during OOS\n            if self.frozen:\n                self.kf.x[6] = self.kf.x[7] = 0\n            Q = new_kf_process_noise(self.kf.x[2, 0], self.kf.x[3, 0])\n        else:\n            if (self.kf.x[6] + self.kf.x[2]) <= 0:\n                self.kf.x[6] *= 0.0\n            Q = None\n\n        self.kf.predict(Q=Q)\n        self.age += 1\n        if self.time_since_update > 0:\n            self.hit_streak = 0\n        self.time_since_update += 1\n        self.history.append(self.x_to_bbox_func(self.kf.x))\n        return self.history[-1]\n\n    def get_state(self):\n        \"\"\"\n        Returns the current bounding box estimate.\n        \"\"\"\n        return self.x_to_bbox_func(self.kf.x)\n\n    def mahalanobis(self, bbox):\n        \"\"\"Should be run after a predict() call for accuracy.\"\"\"\n        return self.kf.md_for_measurement(self.bbox_to_z_func(bbox))\n\n\n\"\"\"\n    We support multiple ways for association cost calculation, by default\n    we use IoU. GIoU may have better performance in some situations. We note \n    that we hardly normalize the cost by all methods to (0,1) which may not be \n    the best practice.\n\"\"\"\nASSO_FUNCS = {\n    \"iou\": iou_batch,\n    \"giou\": giou_batch,\n    \"ciou\": ciou_batch,\n    \"diou\": diou_batch,\n    \"ct_dist\": ct_dist,\n}\n\n\nclass OCSort(object):\n    def __init__(\n        self,\n        model_weights,\n        device,\n        fp16,\n        det_thresh,\n        max_age=30,\n        min_hits=3,\n        iou_threshold=0.3,\n        delta_t=3,\n        asso_func=\"iou\",\n        inertia=0.2,\n        w_association_emb=0.75,\n        alpha_fixed_emb=0.95,\n        aw_param=0.5,\n        embedding_off=False,\n        cmc_off=False,\n        aw_off=False,\n        new_kf_off=False,\n        **kwargs\n    ):\n        \"\"\"\n        Sets key parameters for SORT\n        \"\"\"\n        self.max_age = max_age\n        self.min_hits = min_hits\n        self.iou_threshold = iou_threshold\n        self.trackers = []\n        self.frame_count = 0\n        self.det_thresh = det_thresh\n        self.delta_t = delta_t\n        self.asso_func = ASSO_FUNCS[asso_func]\n        self.inertia = inertia\n        self.w_association_emb = w_association_emb\n        self.alpha_fixed_emb = alpha_fixed_emb\n        self.aw_param = aw_param\n        KalmanBoxTracker.count = 0\n\n        self.embedder = ReIDDetectMultiBackend(weights=model_weights, device=device, fp16=fp16)\n        self.cmc = CMCComputer()\n        self.embedding_off = embedding_off\n        self.cmc_off = cmc_off\n        self.aw_off = aw_off\n        self.new_kf_off = new_kf_off\n\n    def update(self, dets, img_numpy, tag='blub'):\n        \"\"\"\n        Params:\n          dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]\n        Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).\n        Returns the a similar array, where the last column is the object ID.\n        NOTE: The number of objects returned may differ from the number of detections provided.\n        \"\"\"\n        xyxys = dets[:, 0:4]\n        scores = dets[:, 4]\n        clss = dets[:, 5]\n        \n        classes = clss.numpy()\n        xyxys = xyxys.numpy()\n        scores = scores.numpy()\n        \n        dets = dets[:, 0:6].numpy()\n        remain_inds = scores > self.det_thresh\n        dets = dets[remain_inds]\n        self.height, self.width = img_numpy.shape[:2]\n\n        # Rescale\n        #scale = min(img_tensor.shape[2] / img_numpy.shape[0], img_tensor.shape[3] / img_numpy.shape[1])\n        #dets[:, :4] /= scale\n\n        # Embedding\n        if self.embedding_off or dets.shape[0] == 0:\n            dets_embs = np.ones((dets.shape[0], 1))\n        else:\n            # (Ndets x X) [512, 1024, 2048]\n            #dets_embs = self.embedder.compute_embedding(img_numpy, dets[:, :4], tag)\n            dets_embs = self._get_features(dets[:, :4], img_numpy)\n\n        # CMC\n        if not self.cmc_off:\n            transform = self.cmc.compute_affine(img_numpy, dets[:, :4], tag)\n            for trk in self.trackers:\n                trk.apply_affine_correction(transform)\n\n        trust = (dets[:, 4] - self.det_thresh) / (1 - self.det_thresh)\n        af = self.alpha_fixed_emb\n        # From [self.alpha_fixed_emb, 1], goes to 1 as detector is less confident\n        dets_alpha = af + (1 - af) * (1 - trust)\n\n        # get predicted locations from existing trackers.\n        trks = np.zeros((len(self.trackers), 5))\n        trk_embs = []\n        to_del = []\n        ret = []\n        for t, trk in enumerate(trks):\n            pos = self.trackers[t].predict()[0]\n            trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]\n            if np.any(np.isnan(pos)):\n                to_del.append(t)\n            else:  \n                trk_embs.append(self.trackers[t].get_emb())\n        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))\n\n        if len(trk_embs) > 0:\n            trk_embs = np.vstack(trk_embs)\n        else:\n            trk_embs = np.array(trk_embs)\n\n        for t in reversed(to_del):\n            self.trackers.pop(t)\n\n        velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])\n        last_boxes = np.array([trk.last_observation for trk in self.trackers])\n        k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])\n\n        \"\"\"\n            First round of association\n        \"\"\"\n        # (M detections X N tracks, final score)\n        if self.embedding_off or dets.shape[0] == 0 or trk_embs.shape[0] == 0:\n            stage1_emb_cost = None\n        else:\n            stage1_emb_cost = dets_embs @ trk_embs.T\n        matched, unmatched_dets, unmatched_trks = associate(\n            dets,\n            trks,\n            self.iou_threshold,\n            velocities,\n            k_observations,\n            self.inertia,\n            stage1_emb_cost,\n            self.w_association_emb,\n            self.aw_off,\n            self.aw_param,\n        )\n        for m in matched:\n            self.trackers[m[1]].update(dets[m[0], :5], dets[m[0], 5])\n            self.trackers[m[1]].update_emb(dets_embs[m[0]], alpha=dets_alpha[m[0]])\n\n        \"\"\"\n            Second round of associaton by OCR\n        \"\"\"\n        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:\n            left_dets = dets[unmatched_dets]\n            left_dets_embs = dets_embs[unmatched_dets]\n            left_trks = last_boxes[unmatched_trks]\n            left_trks_embs = trk_embs[unmatched_trks]\n\n            iou_left = self.asso_func(left_dets, left_trks)\n            # TODO: is better without this\n            emb_cost_left = left_dets_embs @ left_trks_embs.T\n            if self.embedding_off:\n                emb_cost_left = np.zeros_like(emb_cost_left)\n            iou_left = np.array(iou_left)\n            if iou_left.max() > self.iou_threshold:\n                \"\"\"\n                NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may\n                get a higher performance especially on MOT17/MOT20 datasets. But we keep it\n                uniform here for simplicity\n                \"\"\"\n                rematched_indices = linear_assignment(-iou_left)\n                to_remove_det_indices = []\n                to_remove_trk_indices = []\n                for m in rematched_indices:\n                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]\n                    if iou_left[m[0], m[1]] < self.iou_threshold:\n                        continue\n                    self.trackers[trk_ind].update(dets[det_ind, :5], dets[det_ind, 5])\n                    self.trackers[trk_ind].update_emb(dets_embs[det_ind], alpha=dets_alpha[det_ind])\n                    to_remove_det_indices.append(det_ind)\n                    to_remove_trk_indices.append(trk_ind)\n                unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))\n                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))\n\n        for m in unmatched_trks:\n            self.trackers[m].update(None, None)\n\n        # create and initialise new trackers for unmatched detections\n        for i in unmatched_dets:\n            trk = KalmanBoxTracker(\n                dets[i, :5], dets[i, 5], delta_t=self.delta_t, emb=dets_embs[i], alpha=dets_alpha[i], new_kf=not self.new_kf_off\n            )\n            self.trackers.append(trk)\n        i = len(self.trackers)\n        for trk in reversed(self.trackers):\n            if trk.last_observation.sum() < 0:\n                d = trk.get_state()[0]\n            else:\n                \"\"\"\n                this is optional to use the recent observation or the kalman filter prediction,\n                we didn't notice significant difference here\n                \"\"\"\n                d = trk.last_observation[:4]\n            if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):\n                # +1 as MOT benchmark requires positive\n                ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1))\n            i -= 1\n            # remove dead tracklet\n            if trk.time_since_update > self.max_age:\n                self.trackers.pop(i)\n        if len(ret) > 0:\n            return np.concatenate(ret)\n        return np.empty((0, 5))\n    \n    def _xywh_to_xyxy(self, bbox_xywh):\n        x, y, w, h = bbox_xywh\n        x1 = max(int(x - w / 2), 0)\n        x2 = min(int(x + w / 2), self.width - 1)\n        y1 = max(int(y - h / 2), 0)\n        y2 = min(int(y + h / 2), self.height - 1)\n        return x1, y1, x2, y2\n    \n    def _get_features(self, bbox_xyxy, ori_img):\n        im_crops = []\n        for box in bbox_xyxy:\n            x1, y1, x2, y2 = box.astype(int)\n            im = ori_img[y1:y2, x1:x2]\n            im_crops.append(im)\n        if im_crops:\n            features = self.embedder(im_crops).cpu()\n        else:\n            features = np.array([])\n        \n        return features\n\n    def update_public(self, dets, cates, scores):\n        self.frame_count += 1\n\n        det_scores = np.ones((dets.shape[0], 1))\n        dets = np.concatenate((dets, det_scores), axis=1)\n\n        remain_inds = scores > self.det_thresh\n\n        cates = cates[remain_inds]\n        dets = dets[remain_inds]\n\n        trks = np.zeros((len(self.trackers), 5))\n        to_del = []\n        ret = []\n        for t, trk in enumerate(trks):\n            pos = self.trackers[t].predict()[0]\n            cat = self.trackers[t].cate\n            trk[:] = [pos[0], pos[1], pos[2], pos[3], cat]\n            if np.any(np.isnan(pos)):\n                to_del.append(t)\n        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))\n        for t in reversed(to_del):\n            self.trackers.pop(t)\n\n        velocities = np.array([trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])\n        last_boxes = np.array([trk.last_observation for trk in self.trackers])\n        k_observations = np.array([k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])\n\n        matched, unmatched_dets, unmatched_trks = associate_kitti(\n            dets,\n            trks,\n            cates,\n            self.iou_threshold,\n            velocities,\n            k_observations,\n            self.inertia,\n        )\n\n        for m in matched:\n            self.trackers[m[1]].update(dets[m[0], :])\n\n        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:\n            \"\"\"\n            The re-association stage by OCR.\n            NOTE: at this stage, adding other strategy might be able to continue improve\n            the performance, such as BYTE association by ByteTrack.\n            \"\"\"\n            left_dets = dets[unmatched_dets]\n            left_trks = last_boxes[unmatched_trks]\n            left_dets_c = left_dets.copy()\n            left_trks_c = left_trks.copy()\n\n            iou_left = self.asso_func(left_dets_c, left_trks_c)\n            iou_left = np.array(iou_left)\n            det_cates_left = cates[unmatched_dets]\n            trk_cates_left = trks[unmatched_trks][:, 4]\n            num_dets = unmatched_dets.shape[0]\n            num_trks = unmatched_trks.shape[0]\n            cate_matrix = np.zeros((num_dets, num_trks))\n            for i in range(num_dets):\n                for j in range(num_trks):\n                    if det_cates_left[i] != trk_cates_left[j]:\n                        \"\"\"\n                        For some datasets, such as KITTI, there are different categories,\n                        we have to avoid associate them together.\n                        \"\"\"\n                        cate_matrix[i][j] = -1e6\n            iou_left = iou_left + cate_matrix\n            if iou_left.max() > self.iou_threshold - 0.1:\n                rematched_indices = linear_assignment(-iou_left)\n                to_remove_det_indices = []\n                to_remove_trk_indices = []\n                for m in rematched_indices:\n                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]\n                    if iou_left[m[0], m[1]] < self.iou_threshold - 0.1:\n                        continue\n                    self.trackers[trk_ind].update(dets[det_ind, :])\n                    to_remove_det_indices.append(det_ind)\n                    to_remove_trk_indices.append(trk_ind)\n                unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))\n                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))\n\n        for i in unmatched_dets:\n            trk = KalmanBoxTracker(dets[i, :])\n            trk.cate = cates[i]\n            self.trackers.append(trk)\n        i = len(self.trackers)\n\n        for trk in reversed(self.trackers):\n            if trk.last_observation.sum() > 0:\n                d = trk.last_observation[:4]\n            else:\n                d = trk.get_state()[0]\n            if trk.time_since_update < 1:\n                if (self.frame_count <= self.min_hits) or (trk.hit_streak >= self.min_hits):\n                    # id+1 as MOT benchmark requires positive\n                    ret.append(np.concatenate((d, [trk.id + 1], [trk.cls], [trk.conf])).reshape(1, -1))\n                if trk.hit_streak == self.min_hits:\n                    # Head Padding (HP): recover the lost steps during initializing the track\n                    for prev_i in range(self.min_hits - 1):\n                        prev_observation = trk.history_observations[-(prev_i + 2)]\n                        ret.append(\n                            (\n                                np.concatenate(\n                                    (\n                                        prev_observation[:4],\n                                        [trk.id + 1],\n                                        [trk.cls],\n                                        [trk.conf],\n                                    )\n                                )\n                            ).reshape(1, -1)\n                        )\n            i -= 1\n            if trk.time_since_update > self.max_age:\n                self.trackers.pop(i)\n\n        if len(ret) > 0:\n            return np.concatenate(ret)\n        return np.empty((0, 7))\n\n    def dump_cache(self):\n        self.cmc.dump_cache()\n        self.embedder.dump_cache()\n"
  },
  {
    "path": "DLTA_AI_app/trackers/deepocsort/reid_multibackend.py",
    "content": "import torch.nn as nn\nimport torch\nfrom pathlib import Path\nimport numpy as np\nfrom itertools import islice\nimport torchvision.transforms as transforms\nimport cv2\nimport sys\nimport torchvision.transforms as T\nfrom collections import OrderedDict, namedtuple\nimport gdown\nfrom os.path import exists as file_exists\n\n\nfrom ultralytics.yolo.utils.checks import check_requirements, check_version\nfrom ultralytics.yolo.utils import LOGGER\nfrom trackers.strongsort.deep.reid_model_factory import (show_downloadeable_models, get_model_url, get_model_name,\n                                                          download_url, load_pretrained_weights)\nfrom trackers.strongsort.deep.models import build_model\n\n\ndef check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):\n    # Check file(s) for acceptable suffix\n    if file and suffix:\n        if isinstance(suffix, str):\n            suffix = [suffix]\n        for f in file if isinstance(file, (list, tuple)) else [file]:\n            s = Path(f).suffix.lower()  # file suffix\n            if len(s):\n                assert s in suffix, f\"{msg}{f} acceptable suffix is {suffix}\"\n\n\nclass ReIDDetectMultiBackend(nn.Module):\n    # ReID models MultiBackend class for python inference on various backends\n    def __init__(self, weights='osnet_x0_25_msmt17.pt', device=torch.device('cpu'), fp16=False):\n        super().__init__()\n\n        w = weights[0] if isinstance(weights, list) else weights\n        self.pt, self.jit, self.onnx, self.xml, self.engine, self.tflite = self.model_type(w)  # get backend\n        self.fp16 = fp16\n        self.fp16 &= self.pt or self.jit or self.engine  # FP16\n\n        # Build transform functions\n        self.device = device\n        self.image_size=(256, 128)\n        self.pixel_mean=[0.485, 0.456, 0.406]\n        self.pixel_std=[0.229, 0.224, 0.225]\n        self.transforms = []\n        self.transforms += [T.Resize(self.image_size)]\n        self.transforms += [T.ToTensor()]\n        self.transforms += [T.Normalize(mean=self.pixel_mean, std=self.pixel_std)]\n        self.preprocess = T.Compose(self.transforms)\n        self.to_pil = T.ToPILImage()\n\n        model_name = get_model_name(w)\n\n        if w.suffix == '.pt':\n            model_url = get_model_url(w)\n            if not file_exists(w) and model_url is not None:\n                gdown.download(model_url, str(w), quiet=False)\n            elif file_exists(w):\n                pass\n            else:\n                print(f'No URL associated to the chosen StrongSORT weights ({w}). Choose between:')\n                show_downloadeable_models()\n                exit()\n\n        # Build model\n        self.model = build_model(\n            model_name,\n            num_classes=1,\n            pretrained=not (w and w.is_file()),\n            use_gpu=device\n        )\n\n        if self.pt:  # PyTorch\n            # populate model arch with weights\n            if w and w.is_file() and w.suffix == '.pt':\n                load_pretrained_weights(self.model, w)\n                \n            self.model.to(device).eval()\n            self.model.half() if self.fp16 else  self.model.float()\n        elif self.jit:\n            LOGGER.info(f'Loading {w} for TorchScript inference...')\n            self.model = torch.jit.load(w)\n            self.model.half() if self.fp16 else self.model.float()\n        elif self.onnx:  # ONNX Runtime\n            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')\n            cuda = torch.cuda.is_available() and device.type != 'cpu'\n            #check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))\n            import onnxruntime\n            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']\n            self.session = onnxruntime.InferenceSession(str(w), providers=providers)\n        elif self.engine:  # TensorRT\n            LOGGER.info(f'Loading {w} for TensorRT inference...')\n            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download\n            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0\n            if device.type == 'cpu':\n                device = torch.device('cuda:0')\n            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))\n            logger = trt.Logger(trt.Logger.INFO)\n            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:\n                self.model_ = runtime.deserialize_cuda_engine(f.read())\n            self.context = self.model_.create_execution_context()\n            self.bindings = OrderedDict()\n            self.fp16 = False  # default updated below\n            dynamic = False\n            for index in range(self.model_.num_bindings):\n                name = self.model_.get_binding_name(index)\n                dtype = trt.nptype(self.model_.get_binding_dtype(index))\n                if self.model_.binding_is_input(index):\n                    if -1 in tuple(self.model_.get_binding_shape(index)):  # dynamic\n                        dynamic = True\n                        self.context.set_binding_shape(index, tuple(self.model_.get_profile_shape(0, index)[2]))\n                    if dtype == np.float16:\n                        self.fp16 = True\n                shape = tuple(self.context.get_binding_shape(index))\n                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)\n                self.bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))\n            self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())\n            batch_size = self.bindings['images'].shape[0]  # if dynamic, this is instead max batch size\n        elif self.xml:  # OpenVINO\n            LOGGER.info(f'Loading {w} for OpenVINO inference...')\n            check_requirements(('openvino',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/\n            from openvino.runtime import Core, Layout, get_batch\n            ie = Core()\n            if not Path(w).is_file():  # if not *.xml\n                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir\n            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))\n            if network.get_parameters()[0].get_layout().empty:\n                network.get_parameters()[0].set_layout(Layout(\"NCWH\"))\n            batch_dim = get_batch(network)\n            if batch_dim.is_static:\n                batch_size = batch_dim.get_length()\n            self.executable_network = ie.compile_model(network, device_name=\"CPU\")  # device_name=\"MYRIAD\" for Intel NCS2\n            self.output_layer = next(iter(self.executable_network.outputs))\n        \n        elif self.tflite:\n            LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')\n            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu\n                from tflite_runtime.interpreter import Interpreter, load_delegate\n            except ImportError:\n                import tensorflow as tf\n                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,\n            self.interpreter = tf.lite.Interpreter(model_path=w)\n            self.interpreter.allocate_tensors()\n            # Get input and output tensors.\n            self.input_details = self.interpreter.get_input_details()\n            self.output_details = self.interpreter.get_output_details()\n            \n            # Test model on random input data.\n            input_data = np.array(np.random.random_sample((1,256,128,3)), dtype=np.float32)\n            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)\n            \n            self.interpreter.invoke()\n\n            # The function `get_tensor()` returns a copy of the tensor data.\n            output_data = self.interpreter.get_tensor(self.output_details[0]['index'])\n        else:\n            print('This model framework is not supported yet!')\n            exit()\n        \n        \n    @staticmethod\n    def model_type(p='path/to/model.pt'):\n        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx\n        from trackers.reid_export import export_formats\n        sf = list(export_formats().Suffix)  # export suffixes\n        check_suffix(p, sf)  # checks\n        types = [s in Path(p).name for s in sf]\n        return types\n\n    def _preprocess(self, im_batch):\n\n        images = []\n        for element in im_batch:\n            image = self.to_pil(element)\n            image = self.preprocess(image)\n            images.append(image)\n\n        images = torch.stack(images, dim=0)\n        images = images.to(self.device)\n\n        return images\n    \n    \n    def forward(self, im_batch):\n        \n        # preprocess batch\n        im_batch = self._preprocess(im_batch)\n\n        # batch to half\n        if self.fp16 and im_batch.dtype != torch.float16:\n           im_batch = im_batch.half()\n\n        # batch processing\n        features = []\n        if self.pt:\n            features = self.model(im_batch)\n        elif self.jit:  # TorchScript\n            features = self.model(im_batch)\n        elif self.onnx:  # ONNX Runtime\n            im_batch = im_batch.cpu().numpy()  # torch to numpy\n            features = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im_batch})[0]\n        elif self.engine:  # TensorRT\n            if True and im_batch.shape != self.bindings['images'].shape:\n                i_in, i_out = (self.model_.get_binding_index(x) for x in ('images', 'output'))\n                self.context.set_binding_shape(i_in, im_batch.shape)  # reshape if dynamic\n                self.bindings['images'] = self.bindings['images']._replace(shape=im_batch.shape)\n                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))\n            s = self.bindings['images'].shape\n            assert im_batch.shape == s, f\"input size {im_batch.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}\"\n            self.binding_addrs['images'] = int(im_batch.data_ptr())\n            self.context.execute_v2(list(self.binding_addrs.values()))\n            features = self.bindings['output'].data\n        elif self.xml:  # OpenVINO\n            im_batch = im_batch.cpu().numpy()  # FP32\n            features = self.executable_network([im_batch])[self.output_layer]\n        else:\n            print('Framework not supported at the moment, we are working on it...')\n            exit()\n\n        if isinstance(features, (list, tuple)):\n            return self.from_numpy(features[0]) if len(features) == 1 else [self.from_numpy(x) for x in features]\n        else:\n            return self.from_numpy(features)\n\n    def from_numpy(self, x):\n        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x\n\n    def warmup(self, imgsz=[(256, 128, 3)]):\n        # Warmup model by running inference once\n        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.tflite\n        if any(warmup_types) and self.device.type != 'cpu':\n            im = [np.empty(*imgsz).astype(np.uint8)]  # input\n            for _ in range(2 if self.jit else 1):  #\n                self.forward(im)  # warmup"
  },
  {
    "path": "DLTA_AI_app/trackers/multi_tracker_zoo.py",
    "content": "from trackers.strongsort.utils.parser import get_config\n\ndef create_tracker(tracker_type, tracker_config, reid_weights, device, half):\n    \n    cfg = get_config()\n    cfg.merge_from_file(tracker_config)\n    \n    if tracker_type == 'strongsort':\n        from trackers.strongsort.strong_sort import StrongSORT\n        strongsort = StrongSORT(\n            reid_weights,\n            device,\n            half,\n            max_dist=cfg.strongsort.max_dist,\n            max_iou_dist=cfg.strongsort.max_iou_dist,\n            max_age=cfg.strongsort.max_age,\n            max_unmatched_preds=cfg.strongsort.max_unmatched_preds,\n            n_init=cfg.strongsort.n_init,\n            nn_budget=cfg.strongsort.nn_budget,\n            mc_lambda=cfg.strongsort.mc_lambda,\n            ema_alpha=cfg.strongsort.ema_alpha,\n\n        )\n        return strongsort\n    \n    elif tracker_type == 'ocsort':\n        from trackers.ocsort.ocsort import OCSort\n        ocsort = OCSort(\n            det_thresh=cfg.ocsort.det_thresh,\n            max_age=cfg.ocsort.max_age,\n            min_hits=cfg.ocsort.min_hits,\n            iou_threshold=cfg.ocsort.iou_thresh,\n            delta_t=cfg.ocsort.delta_t,\n            asso_func=cfg.ocsort.asso_func,\n            inertia=cfg.ocsort.inertia,\n            use_byte=cfg.ocsort.use_byte,\n        )\n        return ocsort\n    \n    elif tracker_type == 'bytetrack':\n        from trackers.bytetrack.byte_tracker import BYTETracker\n        bytetracker = BYTETracker(\n            track_thresh=cfg.bytetrack.track_thresh,\n            match_thresh=cfg.bytetrack.match_thresh,\n            track_buffer=cfg.bytetrack.track_buffer,\n            frame_rate=cfg.bytetrack.frame_rate\n        )\n        return bytetracker\n    \n    elif tracker_type == 'botsort':\n        from trackers.botsort.bot_sort import BoTSORT\n        botsort = BoTSORT(\n            reid_weights,\n            device,\n            half,\n            track_high_thresh=cfg.botsort.track_high_thresh,\n            new_track_thresh=cfg.botsort.new_track_thresh,\n            track_buffer =cfg.botsort.track_buffer,\n            match_thresh=cfg.botsort.match_thresh,\n            proximity_thresh=cfg.botsort.proximity_thresh,\n            appearance_thresh=cfg.botsort.appearance_thresh,\n            cmc_method =cfg.botsort.cmc_method,\n            frame_rate=cfg.botsort.frame_rate,\n            lambda_=cfg.botsort.lambda_\n        )\n        return botsort\n    elif tracker_type == 'deepocsort':\n        from trackers.deepocsort.ocsort import OCSort\n        botsort = OCSort(\n            reid_weights,\n            device,\n            half,\n            det_thresh=cfg.deepocsort.det_thresh,\n            max_age=cfg.deepocsort.max_age,\n            min_hits=cfg.deepocsort.min_hits,\n            iou_threshold=cfg.deepocsort.iou_thresh,\n            delta_t=cfg.deepocsort.delta_t,\n            asso_func=cfg.deepocsort.asso_func,\n            inertia=cfg.deepocsort.inertia,\n        )\n        return botsort\n    else:\n        print('No such tracker')\n        exit()"
  },
  {
    "path": "DLTA_AI_app/trackers/ocsort/association.py",
    "content": "import os\nimport numpy as np\n\n\ndef iou_batch(bboxes1, bboxes2):\n    \"\"\"\n    From SORT: Computes IOU between two bboxes in the form [x1,y1,x2,y2]\n    \"\"\"\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n    \n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0., xx2 - xx1)\n    h = np.maximum(0., yy2 - yy1)\n    wh = w * h\n    o = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])                                      \n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh)                                              \n    return(o)  \n\n\ndef giou_batch(bboxes1, bboxes2):\n    \"\"\"\n    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)\n    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)\n    :return:\n    \"\"\"\n    # for details should go to https://arxiv.org/pdf/1902.09630.pdf\n    # ensure predict's bbox form\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0., xx2 - xx1)\n    h = np.maximum(0., yy2 - yy1)\n    wh = w * h\n    iou = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])                                      \n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh)  \n\n    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])\n    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])\n    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])\n    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])\n    wc = xxc2 - xxc1 \n    hc = yyc2 - yyc1 \n    assert((wc > 0).all() and (hc > 0).all())\n    area_enclose = wc * hc \n    giou = iou - (area_enclose - wh) / area_enclose\n    giou = (giou + 1.)/2.0 # resize from (-1,1) to (0,1)\n    return giou\n\n\ndef diou_batch(bboxes1, bboxes2):\n    \"\"\"\n    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)\n    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)\n    :return:\n    \"\"\"\n    # for details should go to https://arxiv.org/pdf/1902.09630.pdf\n    # ensure predict's bbox form\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    # calculate the intersection box\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0., xx2 - xx1)\n    h = np.maximum(0., yy2 - yy1)\n    wh = w * h\n    iou = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])                                      \n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) \n\n    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0\n    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0\n    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0\n    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0\n\n    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2\n\n    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])\n    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])\n    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])\n    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])\n\n    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2\n    diou = iou - inner_diag / outer_diag\n\n    return (diou + 1) / 2.0 # resize from (-1,1) to (0,1)\n\ndef ciou_batch(bboxes1, bboxes2):\n    \"\"\"\n    :param bbox_p: predict of bbox(N,4)(x1,y1,x2,y2)\n    :param bbox_g: groundtruth of bbox(N,4)(x1,y1,x2,y2)\n    :return:\n    \"\"\"\n    # for details should go to https://arxiv.org/pdf/1902.09630.pdf\n    # ensure predict's bbox form\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    # calculate the intersection box\n    xx1 = np.maximum(bboxes1[..., 0], bboxes2[..., 0])\n    yy1 = np.maximum(bboxes1[..., 1], bboxes2[..., 1])\n    xx2 = np.minimum(bboxes1[..., 2], bboxes2[..., 2])\n    yy2 = np.minimum(bboxes1[..., 3], bboxes2[..., 3])\n    w = np.maximum(0., xx2 - xx1)\n    h = np.maximum(0., yy2 - yy1)\n    wh = w * h\n    iou = wh / ((bboxes1[..., 2] - bboxes1[..., 0]) * (bboxes1[..., 3] - bboxes1[..., 1])                                      \n        + (bboxes2[..., 2] - bboxes2[..., 0]) * (bboxes2[..., 3] - bboxes2[..., 1]) - wh) \n\n    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0\n    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0\n    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0\n    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0\n\n    inner_diag = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2\n\n    xxc1 = np.minimum(bboxes1[..., 0], bboxes2[..., 0])\n    yyc1 = np.minimum(bboxes1[..., 1], bboxes2[..., 1])\n    xxc2 = np.maximum(bboxes1[..., 2], bboxes2[..., 2])\n    yyc2 = np.maximum(bboxes1[..., 3], bboxes2[..., 3])\n\n    outer_diag = (xxc2 - xxc1) ** 2 + (yyc2 - yyc1) ** 2\n    \n    w1 = bboxes1[..., 2] - bboxes1[..., 0]\n    h1 = bboxes1[..., 3] - bboxes1[..., 1]\n    w2 = bboxes2[..., 2] - bboxes2[..., 0]\n    h2 = bboxes2[..., 3] - bboxes2[..., 1]\n\n    # prevent dividing over zero. add one pixel shift\n    h2 = h2 + 1.\n    h1 = h1 + 1.\n    arctan = np.arctan(w2/h2) - np.arctan(w1/h1)\n    v = (4 / (np.pi ** 2)) * (arctan ** 2)\n    S = 1 - iou \n    alpha = v / (S+v)\n    ciou = iou - inner_diag / outer_diag - alpha * v\n    \n    return (ciou + 1) / 2.0 # resize from (-1,1) to (0,1)\n\n\ndef ct_dist(bboxes1, bboxes2):\n    \"\"\"\n        Measure the center distance between two sets of bounding boxes,\n        this is a coarse implementation, we don't recommend using it only\n        for association, which can be unstable and sensitive to frame rate\n        and object speed.\n    \"\"\"\n    bboxes2 = np.expand_dims(bboxes2, 0)\n    bboxes1 = np.expand_dims(bboxes1, 1)\n\n    centerx1 = (bboxes1[..., 0] + bboxes1[..., 2]) / 2.0\n    centery1 = (bboxes1[..., 1] + bboxes1[..., 3]) / 2.0\n    centerx2 = (bboxes2[..., 0] + bboxes2[..., 2]) / 2.0\n    centery2 = (bboxes2[..., 1] + bboxes2[..., 3]) / 2.0\n\n    ct_dist2 = (centerx1 - centerx2) ** 2 + (centery1 - centery2) ** 2\n\n    ct_dist = np.sqrt(ct_dist2)\n\n    # The linear rescaling is a naive version and needs more study\n    ct_dist = ct_dist / ct_dist.max()\n    return ct_dist.max() - ct_dist # resize to (0,1)\n\n\n\ndef speed_direction_batch(dets, tracks):\n    tracks = tracks[..., np.newaxis]\n    CX1, CY1 = (dets[:,0] + dets[:,2])/2.0, (dets[:,1]+dets[:,3])/2.0\n    CX2, CY2 = (tracks[:,0] + tracks[:,2]) /2.0, (tracks[:,1]+tracks[:,3])/2.0\n    dx = CX1 - CX2 \n    dy = CY1 - CY2 \n    norm = np.sqrt(dx**2 + dy**2) + 1e-6\n    dx = dx / norm \n    dy = dy / norm\n    return dy, dx # size: num_track x num_det\n\n\ndef linear_assignment(cost_matrix):\n    try:\n        import lap\n        _, x, y = lap.lapjv(cost_matrix, extend_cost=True)\n        return np.array([[y[i],i] for i in x if i >= 0]) #\n    except ImportError:\n        from scipy.optimize import linear_sum_assignment\n        x, y = linear_sum_assignment(cost_matrix)\n        return np.array(list(zip(x, y)))\n\n\ndef associate_detections_to_trackers(detections,trackers, iou_threshold = 0.3):\n    \"\"\"\n    Assigns detections to tracked object (both represented as bounding boxes)\n    Returns 3 lists of matches, unmatched_detections and unmatched_trackers\n    \"\"\"\n    if(len(trackers)==0):\n        return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)\n\n    iou_matrix = iou_batch(detections, trackers)\n\n    if min(iou_matrix.shape) > 0:\n        a = (iou_matrix > iou_threshold).astype(np.int32)\n        if a.sum(1).max() == 1 and a.sum(0).max() == 1:\n            matched_indices = np.stack(np.where(a), axis=1)\n        else:\n            matched_indices = linear_assignment(-iou_matrix)\n    else:\n        matched_indices = np.empty(shape=(0,2))\n\n    unmatched_detections = []\n    for d, det in enumerate(detections):\n        if(d not in matched_indices[:,0]):\n            unmatched_detections.append(d)\n    unmatched_trackers = []\n    for t, trk in enumerate(trackers):\n        if(t not in matched_indices[:,1]):\n            unmatched_trackers.append(t)\n\n    #filter out matched with low IOU\n    matches = []\n    for m in matched_indices:\n        if(iou_matrix[m[0], m[1]]<iou_threshold):\n            unmatched_detections.append(m[0])\n            unmatched_trackers.append(m[1])\n        else:\n            matches.append(m.reshape(1,2))\n    if(len(matches)==0):\n        matches = np.empty((0,2),dtype=int)\n    else:\n        matches = np.concatenate(matches,axis=0)\n\n    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)\n\n\ndef associate(detections, trackers, iou_threshold, velocities, previous_obs, vdc_weight):    \n    if(len(trackers)==0):\n        return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)\n\n    Y, X = speed_direction_batch(detections, previous_obs)\n    inertia_Y, inertia_X = velocities[:,0], velocities[:,1]\n    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)\n    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)\n    diff_angle_cos = inertia_X * X + inertia_Y * Y\n    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)\n    diff_angle = np.arccos(diff_angle_cos)\n    diff_angle = (np.pi /2.0 - np.abs(diff_angle)) / np.pi\n\n    valid_mask = np.ones(previous_obs.shape[0])\n    valid_mask[np.where(previous_obs[:,4]<0)] = 0\n    \n    iou_matrix = iou_batch(detections, trackers)\n    scores = np.repeat(detections[:,-1][:, np.newaxis], trackers.shape[0], axis=1)\n    # iou_matrix = iou_matrix * scores # a trick sometiems works, we don't encourage this\n    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)\n\n    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight\n    angle_diff_cost = angle_diff_cost.T\n    angle_diff_cost = angle_diff_cost * scores\n\n    if min(iou_matrix.shape) > 0:\n        a = (iou_matrix > iou_threshold).astype(np.int32)\n        if a.sum(1).max() == 1 and a.sum(0).max() == 1:\n            matched_indices = np.stack(np.where(a), axis=1)\n        else:\n            matched_indices = linear_assignment(-(iou_matrix+angle_diff_cost))\n    else:\n        matched_indices = np.empty(shape=(0,2))\n\n    unmatched_detections = []\n    for d, det in enumerate(detections):\n        if(d not in matched_indices[:,0]):\n            unmatched_detections.append(d)\n    unmatched_trackers = []\n    for t, trk in enumerate(trackers):\n        if(t not in matched_indices[:,1]):\n            unmatched_trackers.append(t)\n\n    # filter out matched with low IOU\n    matches = []\n    for m in matched_indices:\n        if(iou_matrix[m[0], m[1]]<iou_threshold):\n            unmatched_detections.append(m[0])\n            unmatched_trackers.append(m[1])\n        else:\n            matches.append(m.reshape(1,2))\n    if(len(matches)==0):\n        matches = np.empty((0,2),dtype=int)\n    else:\n        matches = np.concatenate(matches,axis=0)\n\n    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)\n\n\ndef associate_kitti(detections, trackers, det_cates, iou_threshold, \n        velocities, previous_obs, vdc_weight):\n    if(len(trackers)==0):\n        return np.empty((0,2),dtype=int), np.arange(len(detections)), np.empty((0,5),dtype=int)\n\n    \"\"\"\n        Cost from the velocity direction consistency\n    \"\"\"\n    Y, X = speed_direction_batch(detections, previous_obs)\n    inertia_Y, inertia_X = velocities[:,0], velocities[:,1]\n    inertia_Y = np.repeat(inertia_Y[:, np.newaxis], Y.shape[1], axis=1)\n    inertia_X = np.repeat(inertia_X[:, np.newaxis], X.shape[1], axis=1)\n    diff_angle_cos = inertia_X * X + inertia_Y * Y\n    diff_angle_cos = np.clip(diff_angle_cos, a_min=-1, a_max=1)\n    diff_angle = np.arccos(diff_angle_cos)\n    diff_angle = (np.pi /2.0 - np.abs(diff_angle)) / np.pi\n\n    valid_mask = np.ones(previous_obs.shape[0])\n    valid_mask[np.where(previous_obs[:,4]<0)]=0  \n    valid_mask = np.repeat(valid_mask[:, np.newaxis], X.shape[1], axis=1)\n\n    scores = np.repeat(detections[:,-1][:, np.newaxis], trackers.shape[0], axis=1)\n    angle_diff_cost = (valid_mask * diff_angle) * vdc_weight\n    angle_diff_cost = angle_diff_cost.T\n    angle_diff_cost = angle_diff_cost * scores\n\n    \"\"\"\n        Cost from IoU\n    \"\"\"\n    iou_matrix = iou_batch(detections, trackers)\n    \n\n    \"\"\"\n        With multiple categories, generate the cost for catgory mismatch\n    \"\"\"\n    num_dets = detections.shape[0]\n    num_trk = trackers.shape[0]\n    cate_matrix = np.zeros((num_dets, num_trk))\n    for i in range(num_dets):\n            for j in range(num_trk):\n                if det_cates[i] != trackers[j, 4]:\n                        cate_matrix[i][j] = -1e6\n    \n    cost_matrix = - iou_matrix -angle_diff_cost - cate_matrix\n\n    if min(iou_matrix.shape) > 0:\n        a = (iou_matrix > iou_threshold).astype(np.int32)\n        if a.sum(1).max() == 1 and a.sum(0).max() == 1:\n            matched_indices = np.stack(np.where(a), axis=1)\n        else:\n            matched_indices = linear_assignment(cost_matrix)\n    else:\n        matched_indices = np.empty(shape=(0,2))\n\n    unmatched_detections = []\n    for d, det in enumerate(detections):\n        if(d not in matched_indices[:,0]):\n            unmatched_detections.append(d)\n    unmatched_trackers = []\n    for t, trk in enumerate(trackers):\n        if(t not in matched_indices[:,1]):\n            unmatched_trackers.append(t)\n\n    #filter out matched with low IOU\n    matches = []\n    for m in matched_indices:\n        if(iou_matrix[m[0], m[1]]<iou_threshold):\n            unmatched_detections.append(m[0])\n            unmatched_trackers.append(m[1])\n        else:\n            matches.append(m.reshape(1,2))\n    if(len(matches)==0):\n        matches = np.empty((0,2),dtype=int)\n    else:\n        matches = np.concatenate(matches,axis=0)\n\n    return matches, np.array(unmatched_detections), np.array(unmatched_trackers)"
  },
  {
    "path": "DLTA_AI_app/trackers/ocsort/configs/ocsort.yaml",
    "content": "# Trial number:      137\n# HOTA, MOTA, IDF1:  [55.567]\nocsort:\n  asso_func: giou\n  conf_thres: 0.5122620708221085\n  delta_t: 1\n  det_thresh: 0\n  inertia: 0.3941737016672115\n  iou_thresh: 0.22136877277096445\n  max_age: 50\n  min_hits: 1\n  use_byte: false\n"
  },
  {
    "path": "DLTA_AI_app/trackers/ocsort/kalmanfilter.py",
    "content": "# -*- coding: utf-8 -*-\n# pylint: disable=invalid-name, too-many-arguments, too-many-branches,\n# pylint: disable=too-many-locals, too-many-instance-attributes, too-many-lines\n\n\"\"\"\nThis module implements the linear Kalman filter in both an object\noriented and procedural form. The KalmanFilter class implements\nthe filter by storing the various matrices in instance variables,\nminimizing the amount of bookkeeping you have to do.\nAll Kalman filters operate with a predict->update cycle. The\npredict step, implemented with the method or function predict(),\nuses the state transition matrix F to predict the state in the next\ntime period (epoch). The state is stored as a gaussian (x, P), where\nx is the state (column) vector, and P is its covariance. Covariance\nmatrix Q specifies the process covariance. In Bayesian terms, this\nprediction is called the *prior*, which you can think of colloquially\nas the estimate prior to incorporating the measurement.\nThe update step, implemented with the method or function `update()`,\nincorporates the measurement z with covariance R, into the state\nestimate (x, P). The class stores the system uncertainty in S,\nthe innovation (residual between prediction and measurement in\nmeasurement space) in y, and the Kalman gain in k. The procedural\nform returns these variables to you. In Bayesian terms this computes\nthe *posterior* - the estimate after the information from the\nmeasurement is incorporated.\nWhether you use the OO form or procedural form is up to you. If\nmatrices such as H, R, and F are changing each epoch, you'll probably\nopt to use the procedural form. If they are unchanging, the OO\nform is perhaps easier to use since you won't need to keep track\nof these matrices. This is especially useful if you are implementing\nbanks of filters or comparing various KF designs for performance;\na trivial coding bug could lead to using the wrong sets of matrices.\nThis module also offers an implementation of the RTS smoother, and\nother helper functions, such as log likelihood computations.\nThe Saver class allows you to easily save the state of the\nKalmanFilter class after every update\nThis module expects NumPy arrays for all values that expect\narrays, although in a few cases, particularly method parameters,\nit will accept types that convert to NumPy arrays, such as lists\nof lists. These exceptions are documented in the method or function.\nExamples\n--------\nThe following example constructs a constant velocity kinematic\nfilter, filters noisy data, and plots the results. It also demonstrates\nusing the Saver class to save the state of the filter at each epoch.\n.. code-block:: Python\n    import matplotlib.pyplot as plt\n    import numpy as np\n    from filterpy.kalman import KalmanFilter\n    from filterpy.common import Q_discrete_white_noise, Saver\n    r_std, q_std = 2., 0.003\n    cv = KalmanFilter(dim_x=2, dim_z=1)\n    cv.x = np.array([[0., 1.]]) # position, velocity\n    cv.F = np.array([[1, dt],[ [0, 1]])\n    cv.R = np.array([[r_std^^2]])\n    f.H = np.array([[1., 0.]])\n    f.P = np.diag([.1^^2, .03^^2)\n    f.Q = Q_discrete_white_noise(2, dt, q_std**2)\n    saver = Saver(cv)\n    for z in range(100):\n        cv.predict()\n        cv.update([z + randn() * r_std])\n        saver.save() # save the filter's state\n    saver.to_array()\n    plt.plot(saver.x[:, 0])\n    # plot all of the priors\n    plt.plot(saver.x_prior[:, 0])\n    # plot mahalanobis distance\n    plt.figure()\n    plt.plot(saver.mahalanobis)\nThis code implements the same filter using the procedural form\n    x = np.array([[0., 1.]]) # position, velocity\n    F = np.array([[1, dt],[ [0, 1]])\n    R = np.array([[r_std^^2]])\n    H = np.array([[1., 0.]])\n    P = np.diag([.1^^2, .03^^2)\n    Q = Q_discrete_white_noise(2, dt, q_std**2)\n    for z in range(100):\n        x, P = predict(x, P, F=F, Q=Q)\n        x, P = update(x, P, z=[z + randn() * r_std], R=R, H=H)\n        xs.append(x[0, 0])\n    plt.plot(xs)\nFor more examples see the test subdirectory, or refer to the\nbook cited below. In it I both teach Kalman filtering from basic\nprinciples, and teach the use of this library in great detail.\nFilterPy library.\nhttp://github.com/rlabbe/filterpy\nDocumentation at:\nhttps://filterpy.readthedocs.org\nSupporting book at:\nhttps://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python\nThis is licensed under an MIT license. See the readme.MD file\nfor more information.\nCopyright 2014-2018 Roger R Labbe Jr.\n\"\"\"\n\nfrom __future__ import absolute_import, division\n\nfrom copy import deepcopy\nfrom math import log, exp, sqrt\nimport sys\nimport numpy as np\nfrom numpy import dot, zeros, eye, isscalar, shape\nimport numpy.linalg as linalg\nfrom filterpy.stats import logpdf\nfrom filterpy.common import pretty_str, reshape_z\n\n\nclass KalmanFilterNew(object):\n    \"\"\" Implements a Kalman filter. You are responsible for setting the\n    various state variables to reasonable values; the defaults  will\n    not give you a functional filter.\n    For now the best documentation is my free book Kalman and Bayesian\n    Filters in Python [2]_. The test files in this directory also give you a\n    basic idea of use, albeit without much description.\n    In brief, you will first construct this object, specifying the size of\n    the state vector with dim_x and the size of the measurement vector that\n    you will be using with dim_z. These are mostly used to perform size checks\n    when you assign values to the various matrices. For example, if you\n    specified dim_z=2 and then try to assign a 3x3 matrix to R (the\n    measurement noise matrix you will get an assert exception because R\n    should be 2x2. (If for whatever reason you need to alter the size of\n    things midstream just use the underscore version of the matrices to\n    assign directly: your_filter._R = a_3x3_matrix.)\n    After construction the filter will have default matrices created for you,\n    but you must specify the values for each. It’s usually easiest to just\n    overwrite them rather than assign to each element yourself. This will be\n    clearer in the example below. All are of type numpy.array.\n    Examples\n    --------\n    Here is a filter that tracks position and velocity using a sensor that only\n    reads position.\n    First construct the object with the required dimensionality. Here the state\n    (`dim_x`) has 2 coefficients (position and velocity), and the measurement\n    (`dim_z`) has one. In FilterPy `x` is the state, `z` is the measurement.\n    .. code::\n        from filterpy.kalman import KalmanFilter\n        f = KalmanFilter (dim_x=2, dim_z=1)\n    Assign the initial value for the state (position and velocity). You can do this\n    with a two dimensional array like so:\n        .. code::\n            f.x = np.array([[2.],    # position\n                            [0.]])   # velocity\n    or just use a one dimensional array, which I prefer doing.\n    .. code::\n        f.x = np.array([2., 0.])\n    Define the state transition matrix:\n        .. code::\n            f.F = np.array([[1.,1.],\n                            [0.,1.]])\n    Define the measurement function. Here we need to convert a position-velocity\n    vector into just a position vector, so we use:\n        .. code::\n        f.H = np.array([[1., 0.]])\n    Define the state's covariance matrix P. \n    .. code::\n        f.P = np.array([[1000.,    0.],\n                        [   0., 1000.] ])\n    Now assign the measurement noise. Here the dimension is 1x1, so I can\n    use a scalar\n    .. code::\n        f.R = 5\n    I could have done this instead:\n    .. code::\n        f.R = np.array([[5.]])\n    Note that this must be a 2 dimensional array.\n    Finally, I will assign the process noise. Here I will take advantage of\n    another FilterPy library function:\n    .. code::\n        from filterpy.common import Q_discrete_white_noise\n        f.Q = Q_discrete_white_noise(dim=2, dt=0.1, var=0.13)\n    Now just perform the standard predict/update loop:\n    .. code::\n        while some_condition_is_true:\n            z = get_sensor_reading()\n            f.predict()\n            f.update(z)\n            do_something_with_estimate (f.x)\n    **Procedural Form**\n    This module also contains stand alone functions to perform Kalman filtering.\n    Use these if you are not a fan of objects.\n    **Example**\n    .. code::\n        while True:\n            z, R = read_sensor()\n            x, P = predict(x, P, F, Q)\n            x, P = update(x, P, z, R, H)\n    See my book Kalman and Bayesian Filters in Python [2]_.\n    You will have to set the following attributes after constructing this\n    object for the filter to perform properly. Please note that there are\n    various checks in place to ensure that you have made everything the\n    'correct' size. However, it is possible to provide incorrectly sized\n    arrays such that the linear algebra can not perform an operation.\n    It can also fail silently - you can end up with matrices of a size that\n    allows the linear algebra to work, but are the wrong shape for the problem\n    you are trying to solve.\n    Parameters\n    ----------\n    dim_x : int\n        Number of state variables for the Kalman filter. For example, if\n        you are tracking the position and velocity of an object in two\n        dimensions, dim_x would be 4.\n        This is used to set the default size of P, Q, and u\n    dim_z : int\n        Number of of measurement inputs. For example, if the sensor\n        provides you with position in (x,y), dim_z would be 2.\n    dim_u : int (optional)\n        size of the control input, if it is being used.\n        Default value of 0 indicates it is not used.\n    compute_log_likelihood : bool (default = True)\n        Computes log likelihood by default, but this can be a slow\n        computation, so if you never use it you can turn this computation\n        off.\n    Attributes\n    ----------\n    x : numpy.array(dim_x, 1)\n        Current state estimate. Any call to update() or predict() updates\n        this variable.\n    P : numpy.array(dim_x, dim_x)\n        Current state covariance matrix. Any call to update() or predict()\n        updates this variable.\n    x_prior : numpy.array(dim_x, 1)\n        Prior (predicted) state estimate. The *_prior and *_post attributes\n        are for convenience; they store the  prior and posterior of the\n        current epoch. Read Only.\n    P_prior : numpy.array(dim_x, dim_x)\n        Prior (predicted) state covariance matrix. Read Only.\n    x_post : numpy.array(dim_x, 1)\n        Posterior (updated) state estimate. Read Only.\n    P_post : numpy.array(dim_x, dim_x)\n        Posterior (updated) state covariance matrix. Read Only.\n    z : numpy.array\n        Last measurement used in update(). Read only.\n    R : numpy.array(dim_z, dim_z)\n        Measurement noise covariance matrix. Also known as the\n        observation covariance.\n    Q : numpy.array(dim_x, dim_x)\n        Process noise covariance matrix. Also known as the transition\n        covariance.\n    F : numpy.array()\n        State Transition matrix. Also known as `A` in some formulation.\n    H : numpy.array(dim_z, dim_x)\n        Measurement function. Also known as the observation matrix, or as `C`.\n    y : numpy.array\n        Residual of the update step. Read only.\n    K : numpy.array(dim_x, dim_z)\n        Kalman gain of the update step. Read only.\n    S :  numpy.array\n        System uncertainty (P projected to measurement space). Read only.\n    SI :  numpy.array\n        Inverse system uncertainty. Read only.\n    log_likelihood : float\n        log-likelihood of the last measurement. Read only.\n    likelihood : float\n        likelihood of last measurement. Read only.\n        Computed from the log-likelihood. The log-likelihood can be very\n        small,  meaning a large negative value such as -28000. Taking the\n        exp() of that results in 0.0, which can break typical algorithms\n        which multiply by this value, so by default we always return a\n        number >= sys.float_info.min.\n    mahalanobis : float\n        mahalanobis distance of the innovation. Read only.\n    inv : function, default numpy.linalg.inv\n        If you prefer another inverse function, such as the Moore-Penrose\n        pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv\n        This is only used to invert self.S. If you know it is diagonal, you\n        might choose to set it to filterpy.common.inv_diagonal, which is\n        several times faster than numpy.linalg.inv for diagonal matrices.\n    alpha : float\n        Fading memory setting. 1.0 gives the normal Kalman filter, and\n        values slightly larger than 1.0 (such as 1.02) give a fading\n        memory effect - previous measurements have less influence on the\n        filter's estimates. This formulation of the Fading memory filter\n        (there are many) is due to Dan Simon [1]_.\n    References\n    ----------\n    .. [1] Dan Simon. \"Optimal State Estimation.\" John Wiley & Sons.\n       p. 208-212. (2006)\n    .. [2] Roger Labbe. \"Kalman and Bayesian Filters in Python\"\n       https://github.com/rlabbe/Kalman-and-Bayesian-Filters-in-Python\n    \"\"\"\n\n    def __init__(self, dim_x, dim_z, dim_u=0):\n        if dim_x < 1:\n            raise ValueError('dim_x must be 1 or greater')\n        if dim_z < 1:\n            raise ValueError('dim_z must be 1 or greater')\n        if dim_u < 0:\n            raise ValueError('dim_u must be 0 or greater')\n\n        self.dim_x = dim_x\n        self.dim_z = dim_z\n        self.dim_u = dim_u\n\n        self.x = zeros((dim_x, 1))        # state\n        self.P = eye(dim_x)               # uncertainty covariance\n        self.Q = eye(dim_x)               # process uncertainty\n        self.B = None                     # control transition matrix\n        self.F = eye(dim_x)               # state transition matrix\n        self.H = zeros((dim_z, dim_x))    # measurement function\n        self.R = eye(dim_z)               # measurement uncertainty\n        self._alpha_sq = 1.               # fading memory control\n        self.M = np.zeros((dim_x, dim_z)) # process-measurement cross correlation\n        self.z = np.array([[None]*self.dim_z]).T\n\n        # gain and residual are computed during the innovation step. We\n        # save them so that in case you want to inspect them for various\n        # purposes\n        self.K = np.zeros((dim_x, dim_z)) # kalman gain\n        self.y = zeros((dim_z, 1))\n        self.S = np.zeros((dim_z, dim_z)) # system uncertainty\n        self.SI = np.zeros((dim_z, dim_z)) # inverse system uncertainty\n\n        # identity matrix. Do not alter this.\n        self._I = np.eye(dim_x)\n\n        # these will always be a copy of x,P after predict() is called\n        self.x_prior = self.x.copy()\n        self.P_prior = self.P.copy()\n\n        # these will always be a copy of x,P after update() is called\n        self.x_post = self.x.copy()             \n        self.P_post = self.P.copy()\n\n        # Only computed only if requested via property\n        self._log_likelihood = log(sys.float_info.min)\n        self._likelihood = sys.float_info.min\n        self._mahalanobis = None\n\n        # keep all observations \n        self.history_obs = []\n\n        self.inv = np.linalg.inv\n\n        self.attr_saved = None\n        self.observed = False \n\n\n    def predict(self, u=None, B=None, F=None, Q=None):\n        \"\"\"\n        Predict next state (prior) using the Kalman filter state propagation\n        equations.\n        Parameters\n        ----------\n        u : np.array, default 0\n            Optional control vector.\n        B : np.array(dim_x, dim_u), or None\n            Optional control transition matrix; a value of None\n            will cause the filter to use `self.B`.\n        F : np.array(dim_x, dim_x), or None\n            Optional state transition matrix; a value of None\n            will cause the filter to use `self.F`.\n        Q : np.array(dim_x, dim_x), scalar, or None\n            Optional process noise matrix; a value of None will cause the\n            filter to use `self.Q`.\n        \"\"\"\n\n        if B is None:\n            B = self.B\n        if F is None:\n            F = self.F\n        if Q is None:\n            Q = self.Q\n        elif isscalar(Q):\n            Q = eye(self.dim_x) * Q\n\n\n        # x = Fx + Bu\n        if B is not None and u is not None:\n            self.x = dot(F, self.x) + dot(B, u)\n        else:\n            self.x = dot(F, self.x)\n\n        # P = FPF' + Q\n        self.P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q\n\n        # save prior\n        self.x_prior = self.x.copy()\n        self.P_prior = self.P.copy()\n\n\n\n    def freeze(self):\n        \"\"\"\n            Save the parameters before non-observation forward\n        \"\"\"\n        self.attr_saved = deepcopy(self.__dict__)\n\n\n    def unfreeze(self):\n        if self.attr_saved is not None:\n            new_history = deepcopy(self.history_obs)\n            self.__dict__ = self.attr_saved\n            # self.history_obs = new_history \n            self.history_obs = self.history_obs[:-1]\n            occur = [int(d is None) for d in new_history]\n            indices = np.where(np.array(occur)==0)[0]\n            index1 = indices[-2]\n            index2 = indices[-1]\n            box1 = new_history[index1]\n            x1, y1, s1, r1 = box1 \n            w1 = np.sqrt(s1 * r1)\n            h1 = np.sqrt(s1 / r1)\n            box2 = new_history[index2]\n            x2, y2, s2, r2 = box2 \n            w2 = np.sqrt(s2 * r2)\n            h2 = np.sqrt(s2 / r2)\n            time_gap = index2 - index1\n            dx = (x2-x1)/time_gap\n            dy = (y2-y1)/time_gap \n            dw = (w2-w1)/time_gap \n            dh = (h2-h1)/time_gap\n            for i in range(index2 - index1):\n                \"\"\"\n                    The default virtual trajectory generation is by linear\n                    motion (constant speed hypothesis), you could modify this \n                    part to implement your own. \n                \"\"\"\n                x = x1 + (i+1) * dx \n                y = y1 + (i+1) * dy \n                w = w1 + (i+1) * dw \n                h = h1 + (i+1) * dh\n                s = w * h \n                r = w / float(h)\n                new_box = np.array([x, y, s, r]).reshape((4, 1))\n                \"\"\"\n                    I still use predict-update loop here to refresh the parameters,\n                    but this can be faster by directly modifying the internal parameters\n                    as suggested in the paper. I keep this naive but slow way for \n                    easy read and understanding\n                \"\"\"\n                self.update(new_box)\n                if not i == (index2-index1-1):\n                    self.predict()\n\n\n    def update(self, z, R=None, H=None):\n        \"\"\"\n        Add a new measurement (z) to the Kalman filter.\n        If z is None, nothing is computed. However, x_post and P_post are\n        updated with the prior (x_prior, P_prior), and self.z is set to None.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n            If you pass in a value of H, z must be a column vector the\n            of the correct size.\n        R : np.array, scalar, or None\n            Optionally provide R to override the measurement noise for this\n            one call, otherwise  self.R will be used.\n        H : np.array, or None\n            Optionally provide H to override the measurement function for this\n            one call, otherwise self.H will be used.\n        \"\"\"\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n        # append the observation\n        self.history_obs.append(z)\n        \n        if z is None:\n            if self.observed:\n                \"\"\"\n                    Got no observation so freeze the current parameters for future\n                    potential online smoothing.\n                \"\"\"\n                self.freeze()\n            self.observed = False \n            self.z = np.array([[None]*self.dim_z]).T\n            self.x_post = self.x.copy()\n            self.P_post = self.P.copy()\n            self.y = zeros((self.dim_z, 1))\n            return\n        \n        # self.observed = True\n        if not self.observed:\n            \"\"\"\n                Get observation, use online smoothing to re-update parameters\n            \"\"\"\n            self.unfreeze()\n        self.observed = True\n\n        if R is None:\n            R = self.R\n        elif isscalar(R):\n            R = eye(self.dim_z) * R\n\n        if H is None:\n            z = reshape_z(z, self.dim_z, self.x.ndim)\n            H = self.H\n\n        # y = z - Hx\n        # error (residual) between measurement and prediction\n        self.y = z - dot(H, self.x)\n\n        # common subexpression for speed\n        PHT = dot(self.P, H.T)\n\n        # S = HPH' + R\n        # project system uncertainty into measurement space\n        self.S = dot(H, PHT) + R\n        self.SI = self.inv(self.S)\n        # K = PH'inv(S)\n        # map system uncertainty into kalman gain\n        self.K = dot(PHT, self.SI)\n\n        # x = x + Ky\n        # predict new x with residual scaled by the kalman gain\n        self.x = self.x + dot(self.K, self.y)\n\n        # P = (I-KH)P(I-KH)' + KRK'\n        # This is more numerically stable\n        # and works for non-optimal K vs the equation\n        # P = (I-KH)P usually seen in the literature.\n\n        I_KH = self._I - dot(self.K, H)\n        self.P = dot(dot(I_KH, self.P), I_KH.T) + dot(dot(self.K, R), self.K.T)\n\n        # save measurement and posterior state\n        self.z = deepcopy(z)\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n    def predict_steadystate(self, u=0, B=None):\n        \"\"\"\n        Predict state (prior) using the Kalman filter state propagation\n        equations. Only x is updated, P is left unchanged. See\n        update_steadstate() for a longer explanation of when to use this\n        method.\n        Parameters\n        ----------\n        u : np.array\n            Optional control vector. If non-zero, it is multiplied by B\n            to create the control input into the system.\n        B : np.array(dim_x, dim_u), or None\n            Optional control transition matrix; a value of None\n            will cause the filter to use `self.B`.\n        \"\"\"\n\n        if B is None:\n            B = self.B\n\n        # x = Fx + Bu\n        if B is not None:\n            self.x = dot(self.F, self.x) + dot(B, u)\n        else:\n            self.x = dot(self.F, self.x)\n\n        # save prior\n        self.x_prior = self.x.copy()\n        self.P_prior = self.P.copy()\n\n    def update_steadystate(self, z):\n        \"\"\"\n        Add a new measurement (z) to the Kalman filter without recomputing\n        the Kalman gain K, the state covariance P, or the system\n        uncertainty S.\n        You can use this for LTI systems since the Kalman gain and covariance\n        converge to a fixed value. Precompute these and assign them explicitly,\n        or run the Kalman filter using the normal predict()/update(0 cycle\n        until they converge.\n        The main advantage of this call is speed. We do significantly less\n        computation, notably avoiding a costly matrix inversion.\n        Use in conjunction with predict_steadystate(), otherwise P will grow\n        without bound.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        Examples\n        --------\n        >>> cv = kinematic_kf(dim=3, order=2) # 3D const velocity filter\n        >>> # let filter converge on representative data, then save k and P\n        >>> for i in range(100):\n        >>>     cv.predict()\n        >>>     cv.update([i, i, i])\n        >>> saved_k = np.copy(cv.K)\n        >>> saved_P = np.copy(cv.P)\n        later on:\n        >>> cv = kinematic_kf(dim=3, order=2) # 3D const velocity filter\n        >>> cv.K = np.copy(saved_K)\n        >>> cv.P = np.copy(saved_P)\n        >>> for i in range(100):\n        >>>     cv.predict_steadystate()\n        >>>     cv.update_steadystate([i, i, i])\n        \"\"\"\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n        if z is None:\n            self.z = np.array([[None]*self.dim_z]).T\n            self.x_post = self.x.copy()\n            self.P_post = self.P.copy()\n            self.y = zeros((self.dim_z, 1))\n            return\n\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n\n        # y = z - Hx\n        # error (residual) between measurement and prediction\n        self.y = z - dot(self.H, self.x)\n\n        # x = x + Ky\n        # predict new x with residual scaled by the kalman gain\n        self.x = self.x + dot(self.K, self.y)\n\n        self.z = deepcopy(z)\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n    def update_correlated(self, z, R=None, H=None):\n        \"\"\" Add a new measurement (z) to the Kalman filter assuming that\n        process noise and measurement noise are correlated as defined in\n        the `self.M` matrix.\n        A partial derivation can be found in [1]\n        If z is None, nothing is changed.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        R : np.array, scalar, or None\n            Optionally provide R to override the measurement noise for this\n            one call, otherwise  self.R will be used.\n        H : np.array,  or None\n            Optionally provide H to override the measurement function for this\n            one call, otherwise  self.H will be used.\n        References\n        ----------\n        .. [1] Bulut, Y. (2011). Applied Kalman filter theory (Doctoral dissertation, Northeastern University).\n               http://people.duke.edu/~hpgavin/SystemID/References/Balut-KalmanFilter-PhD-NEU-2011.pdf\n        \"\"\"\n\n        # set to None to force recompute\n        self._log_likelihood = None\n        self._likelihood = None\n        self._mahalanobis = None\n\n        if z is None:\n            self.z = np.array([[None]*self.dim_z]).T\n            self.x_post = self.x.copy()\n            self.P_post = self.P.copy()\n            self.y = zeros((self.dim_z, 1))\n            return\n\n        if R is None:\n            R = self.R\n        elif isscalar(R):\n            R = eye(self.dim_z) * R\n\n        # rename for readability and a tiny extra bit of speed\n        if H is None:\n            z = reshape_z(z, self.dim_z, self.x.ndim)\n            H = self.H\n\n        # handle special case: if z is in form [[z]] but x is not a column\n        # vector dimensions will not match\n        if self.x.ndim == 1 and shape(z) == (1, 1):\n            z = z[0]\n\n        if shape(z) == (): # is it scalar, e.g. z=3 or z=np.array(3)\n            z = np.asarray([z])\n\n        # y = z - Hx\n        # error (residual) between measurement and prediction\n        self.y = z - dot(H, self.x)\n\n        # common subexpression for speed\n        PHT = dot(self.P, H.T)\n\n        # project system uncertainty into measurement space\n        self.S = dot(H, PHT) + dot(H, self.M) + dot(self.M.T, H.T) + R\n        self.SI = self.inv(self.S)\n\n        # K = PH'inv(S)\n        # map system uncertainty into kalman gain\n        self.K = dot(PHT + self.M, self.SI)\n\n        # x = x + Ky\n        # predict new x with residual scaled by the kalman gain\n        self.x = self.x + dot(self.K, self.y)\n        self.P = self.P - dot(self.K, dot(H, self.P) + self.M.T)\n\n        self.z = deepcopy(z)\n        self.x_post = self.x.copy()\n        self.P_post = self.P.copy()\n\n    def batch_filter(self, zs, Fs=None, Qs=None, Hs=None,\n                     Rs=None, Bs=None, us=None, update_first=False,\n                     saver=None):\n        \"\"\" Batch processes a sequences of measurements.\n        Parameters\n        ----------\n        zs : list-like\n            list of measurements at each time step `self.dt`. Missing\n            measurements must be represented by `None`.\n        Fs : None, list-like, default=None\n            optional value or list of values to use for the state transition\n            matrix F.\n            If Fs is None then self.F is used for all epochs.\n            Otherwise it must contain a list-like list of F's, one for\n            each epoch.  This allows you to have varying F per epoch.\n        Qs : None, np.array or list-like, default=None\n            optional value or list of values to use for the process error\n            covariance Q.\n            If Qs is None then self.Q is used for all epochs.\n            Otherwise it must contain a list-like list of Q's, one for\n            each epoch.  This allows you to have varying Q per epoch.\n        Hs : None, np.array or list-like, default=None\n            optional list of values to use for the measurement matrix H.\n            If Hs is None then self.H is used for all epochs.\n            If Hs contains a single matrix, then it is used as H for all\n            epochs.\n            Otherwise it must contain a list-like list of H's, one for\n            each epoch.  This allows you to have varying H per epoch.\n        Rs : None, np.array or list-like, default=None\n            optional list of values to use for the measurement error\n            covariance R.\n            If Rs is None then self.R is used for all epochs.\n            Otherwise it must contain a list-like list of R's, one for\n            each epoch.  This allows you to have varying R per epoch.\n        Bs : None, np.array or list-like, default=None\n            optional list of values to use for the control transition matrix B.\n            If Bs is None then self.B is used for all epochs.\n            Otherwise it must contain a list-like list of B's, one for\n            each epoch.  This allows you to have varying B per epoch.\n        us : None, np.array or list-like, default=None\n            optional list of values to use for the control input vector;\n            If us is None then None is used for all epochs (equivalent to 0,\n            or no control input).\n            Otherwise it must contain a list-like list of u's, one for\n            each epoch.\n       update_first : bool, optional, default=False\n            controls whether the order of operations is update followed by\n            predict, or predict followed by update. Default is predict->update.\n        saver : filterpy.common.Saver, optional\n            filterpy.common.Saver object. If provided, saver.save() will be\n            called after every epoch\n        Returns\n        -------\n        means : np.array((n,dim_x,1))\n            array of the state for each time step after the update. Each entry\n            is an np.array. In other words `means[k,:]` is the state at step\n            `k`.\n        covariance : np.array((n,dim_x,dim_x))\n            array of the covariances for each time step after the update.\n            In other words `covariance[k,:,:]` is the covariance at step `k`.\n        means_predictions : np.array((n,dim_x,1))\n            array of the state for each time step after the predictions. Each\n            entry is an np.array. In other words `means[k,:]` is the state at\n            step `k`.\n        covariance_predictions : np.array((n,dim_x,dim_x))\n            array of the covariances for each time step after the prediction.\n            In other words `covariance[k,:,:]` is the covariance at step `k`.\n        Examples\n        --------\n        .. code-block:: Python\n            # this example demonstrates tracking a measurement where the time\n            # between measurement varies, as stored in dts. This requires\n            # that F be recomputed for each epoch. The output is then smoothed\n            # with an RTS smoother.\n            zs = [t + random.randn()*4 for t in range (40)]\n            Fs = [np.array([[1., dt], [0, 1]] for dt in dts]\n            (mu, cov, _, _) = kf.batch_filter(zs, Fs=Fs)\n            (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs)\n        \"\"\"\n\n        #pylint: disable=too-many-statements\n        n = np.size(zs, 0)\n        if Fs is None:\n            Fs = [self.F] * n\n        if Qs is None:\n            Qs = [self.Q] * n\n        if Hs is None:\n            Hs = [self.H] * n\n        if Rs is None:\n            Rs = [self.R] * n\n        if Bs is None:\n            Bs = [self.B] * n\n        if us is None:\n            us = [0] * n\n\n        # mean estimates from Kalman Filter\n        if self.x.ndim == 1:\n            means = zeros((n, self.dim_x))\n            means_p = zeros((n, self.dim_x))\n        else:\n            means = zeros((n, self.dim_x, 1))\n            means_p = zeros((n, self.dim_x, 1))\n\n        # state covariances from Kalman Filter\n        covariances = zeros((n, self.dim_x, self.dim_x))\n        covariances_p = zeros((n, self.dim_x, self.dim_x))\n\n        if update_first:\n            for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n                self.update(z, R=R, H=H)\n                means[i, :] = self.x\n                covariances[i, :, :] = self.P\n\n                self.predict(u=u, B=B, F=F, Q=Q)\n                means_p[i, :] = self.x\n                covariances_p[i, :, :] = self.P\n\n                if saver is not None:\n                    saver.save()\n        else:\n            for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n                self.predict(u=u, B=B, F=F, Q=Q)\n                means_p[i, :] = self.x\n                covariances_p[i, :, :] = self.P\n\n                self.update(z, R=R, H=H)\n                means[i, :] = self.x\n                covariances[i, :, :] = self.P\n\n                if saver is not None:\n                    saver.save()\n\n        return (means, covariances, means_p, covariances_p)\n\n    def rts_smoother(self, Xs, Ps, Fs=None, Qs=None, inv=np.linalg.inv):\n        \"\"\"\n        Runs the Rauch-Tung-Striebel Kalman smoother on a set of\n        means and covariances computed by a Kalman filter. The usual input\n        would come from the output of `KalmanFilter.batch_filter()`.\n        Parameters\n        ----------\n        Xs : numpy.array\n           array of the means (state variable x) of the output of a Kalman\n           filter.\n        Ps : numpy.array\n            array of the covariances of the output of a kalman filter.\n        Fs : list-like collection of numpy.array, optional\n            State transition matrix of the Kalman filter at each time step.\n            Optional, if not provided the filter's self.F will be used\n        Qs : list-like collection of numpy.array, optional\n            Process noise of the Kalman filter at each time step. Optional,\n            if not provided the filter's self.Q will be used\n        inv : function, default numpy.linalg.inv\n            If you prefer another inverse function, such as the Moore-Penrose\n            pseudo inverse, set it to that instead: kf.inv = np.linalg.pinv\n        Returns\n        -------\n        x : numpy.ndarray\n           smoothed means\n        P : numpy.ndarray\n           smoothed state covariances\n        K : numpy.ndarray\n            smoother gain at each step\n        Pp : numpy.ndarray\n           Predicted state covariances\n        Examples\n        --------\n        .. code-block:: Python\n            zs = [t + random.randn()*4 for t in range (40)]\n            (mu, cov, _, _) = kalman.batch_filter(zs)\n            (x, P, K, Pp) = rts_smoother(mu, cov, kf.F, kf.Q)\n        \"\"\"\n\n        if len(Xs) != len(Ps):\n            raise ValueError('length of Xs and Ps must be the same')\n\n        n = Xs.shape[0]\n        dim_x = Xs.shape[1]\n\n        if Fs is None:\n            Fs = [self.F] * n\n        if Qs is None:\n            Qs = [self.Q] * n\n\n        # smoother gain\n        K = zeros((n, dim_x, dim_x))\n\n        x, P, Pp = Xs.copy(), Ps.copy(), Ps.copy()\n        for k in range(n-2, -1, -1):\n            Pp[k] = dot(dot(Fs[k+1], P[k]), Fs[k+1].T) + Qs[k+1]\n\n            #pylint: disable=bad-whitespace\n            K[k]  = dot(dot(P[k], Fs[k+1].T), inv(Pp[k]))\n            x[k] += dot(K[k], x[k+1] - dot(Fs[k+1], x[k]))\n            P[k] += dot(dot(K[k], P[k+1] - Pp[k]), K[k].T)\n\n        return (x, P, K, Pp)\n\n    def get_prediction(self, u=None, B=None, F=None, Q=None):\n        \"\"\"\n        Predict next state (prior) using the Kalman filter state propagation\n        equations and returns it without modifying the object.\n        Parameters\n        ----------\n        u : np.array, default 0\n            Optional control vector.\n        B : np.array(dim_x, dim_u), or None\n            Optional control transition matrix; a value of None\n            will cause the filter to use `self.B`.\n        F : np.array(dim_x, dim_x), or None\n            Optional state transition matrix; a value of None\n            will cause the filter to use `self.F`.\n        Q : np.array(dim_x, dim_x), scalar, or None\n            Optional process noise matrix; a value of None will cause the\n            filter to use `self.Q`.\n        Returns\n        -------\n        (x, P) : tuple\n            State vector and covariance array of the prediction.\n        \"\"\"\n\n        if B is None:\n            B = self.B\n        if F is None:\n            F = self.F\n        if Q is None:\n            Q = self.Q\n        elif isscalar(Q):\n            Q = eye(self.dim_x) * Q\n\n        # x = Fx + Bu\n        if B is not None and u is not None:\n            x = dot(F, self.x) + dot(B, u)\n        else:\n            x = dot(F, self.x)\n\n        # P = FPF' + Q\n        P = self._alpha_sq * dot(dot(F, self.P), F.T) + Q\n\n        return x, P\n\n    def get_update(self, z=None):\n        \"\"\"\n        Computes the new estimate based on measurement `z` and returns it\n        without altering the state of the filter.\n        Parameters\n        ----------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        Returns\n        -------\n        (x, P) : tuple\n            State vector and covariance array of the update.\n       \"\"\"\n\n        if z is None:\n            return self.x, self.P\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n\n        R = self.R\n        H = self.H\n        P = self.P\n        x = self.x\n\n        # error (residual) between measurement and prediction\n        y = z - dot(H, x)\n\n        # common subexpression for speed\n        PHT = dot(P, H.T)\n\n        # project system uncertainty into measurement space\n        S = dot(H, PHT) + R\n\n        # map system uncertainty into kalman gain\n        K = dot(PHT, self.inv(S))\n\n        # predict new x with residual scaled by the kalman gain\n        x = x + dot(K, y)\n\n        # P = (I-KH)P(I-KH)' + KRK'\n        I_KH = self._I - dot(K, H)\n        P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)\n\n        return x, P\n\n    def residual_of(self, z):\n        \"\"\"\n        Returns the residual for the given measurement (z). Does not alter\n        the state of the filter.\n        \"\"\"\n        z = reshape_z(z, self.dim_z, self.x.ndim)\n        return z - dot(self.H, self.x_prior)\n\n    def measurement_of_state(self, x):\n        \"\"\"\n        Helper function that converts a state into a measurement.\n        Parameters\n        ----------\n        x : np.array\n            kalman state vector\n        Returns\n        -------\n        z : (dim_z, 1): array_like\n            measurement for this update. z can be a scalar if dim_z is 1,\n            otherwise it must be convertible to a column vector.\n        \"\"\"\n\n        return dot(self.H, x)\n\n    @property\n    def log_likelihood(self):\n        \"\"\"\n        log-likelihood of the last measurement.\n        \"\"\"\n        if self._log_likelihood is None:\n            self._log_likelihood = logpdf(x=self.y, cov=self.S)\n        return self._log_likelihood\n\n    @property\n    def likelihood(self):\n        \"\"\"\n        Computed from the log-likelihood. The log-likelihood can be very\n        small,  meaning a large negative value such as -28000. Taking the\n        exp() of that results in 0.0, which can break typical algorithms\n        which multiply by this value, so by default we always return a\n        number >= sys.float_info.min.\n        \"\"\"\n        if self._likelihood is None:\n            self._likelihood = exp(self.log_likelihood)\n            if self._likelihood == 0:\n                self._likelihood = sys.float_info.min\n        return self._likelihood\n\n    @property\n    def mahalanobis(self):\n        \"\"\"\"\n        Mahalanobis distance of measurement. E.g. 3 means measurement\n        was 3 standard deviations away from the predicted value.\n        Returns\n        -------\n        mahalanobis : float\n        \"\"\"\n        if self._mahalanobis is None:\n            self._mahalanobis = sqrt(float(dot(dot(self.y.T, self.SI), self.y)))\n        return self._mahalanobis\n\n    @property\n    def alpha(self):\n        \"\"\"\n        Fading memory setting. 1.0 gives the normal Kalman filter, and\n        values slightly larger than 1.0 (such as 1.02) give a fading\n        memory effect - previous measurements have less influence on the\n        filter's estimates. This formulation of the Fading memory filter\n        (there are many) is due to Dan Simon [1]_.\n        \"\"\"\n        return self._alpha_sq**.5\n\n    def log_likelihood_of(self, z):\n        \"\"\"\n        log likelihood of the measurement `z`. This should only be called\n        after a call to update(). Calling after predict() will yield an\n        incorrect result.\"\"\"\n\n        if z is None:\n            return log(sys.float_info.min)\n        return logpdf(z, dot(self.H, self.x), self.S)\n\n    @alpha.setter\n    def alpha(self, value):\n        if not np.isscalar(value) or value < 1:\n            raise ValueError('alpha must be a float greater than 1')\n\n        self._alpha_sq = value**2\n\n    def __repr__(self):\n        return '\\n'.join([\n            'KalmanFilter object',\n            pretty_str('dim_x', self.dim_x),\n            pretty_str('dim_z', self.dim_z),\n            pretty_str('dim_u', self.dim_u),\n            pretty_str('x', self.x),\n            pretty_str('P', self.P),\n            pretty_str('x_prior', self.x_prior),\n            pretty_str('P_prior', self.P_prior),\n            pretty_str('x_post', self.x_post),\n            pretty_str('P_post', self.P_post),\n            pretty_str('F', self.F),\n            pretty_str('Q', self.Q),\n            pretty_str('R', self.R),\n            pretty_str('H', self.H),\n            pretty_str('K', self.K),\n            pretty_str('y', self.y),\n            pretty_str('S', self.S),\n            pretty_str('SI', self.SI),\n            pretty_str('M', self.M),\n            pretty_str('B', self.B),\n            pretty_str('z', self.z),\n            pretty_str('log-likelihood', self.log_likelihood),\n            pretty_str('likelihood', self.likelihood),\n            pretty_str('mahalanobis', self.mahalanobis),\n            pretty_str('alpha', self.alpha),\n            pretty_str('inv', self.inv)\n            ])\n\n    def test_matrix_dimensions(self, z=None, H=None, R=None, F=None, Q=None):\n        \"\"\"\n        Performs a series of asserts to check that the size of everything\n        is what it should be. This can help you debug problems in your design.\n        If you pass in H, R, F, Q those will be used instead of this object's\n        value for those matrices.\n        Testing `z` (the measurement) is problamatic. x is a vector, and can be\n        implemented as either a 1D array or as a nx1 column vector. Thus Hx\n        can be of different shapes. Then, if Hx is a single value, it can\n        be either a 1D array or 2D vector. If either is true, z can reasonably\n        be a scalar (either '3' or np.array('3') are scalars under this\n        definition), a 1D, 1 element array, or a 2D, 1 element array. You are\n        allowed to pass in any combination that works.\n        \"\"\"\n\n        if H is None:\n            H = self.H\n        if R is None:\n            R = self.R\n        if F is None:\n            F = self.F\n        if Q is None:\n            Q = self.Q\n        x = self.x\n        P = self.P\n\n        assert x.ndim == 1 or x.ndim == 2, \\\n                \"x must have one or two dimensions, but has {}\".format(x.ndim)\n\n        if x.ndim == 1:\n            assert x.shape[0] == self.dim_x, \\\n                   \"Shape of x must be ({},{}), but is {}\".format(\n                       self.dim_x, 1, x.shape)\n        else:\n            assert x.shape == (self.dim_x, 1), \\\n                   \"Shape of x must be ({},{}), but is {}\".format(\n                       self.dim_x, 1, x.shape)\n\n        assert P.shape == (self.dim_x, self.dim_x), \\\n               \"Shape of P must be ({},{}), but is {}\".format(\n                   self.dim_x, self.dim_x, P.shape)\n\n        assert Q.shape == (self.dim_x, self.dim_x), \\\n               \"Shape of Q must be ({},{}), but is {}\".format(\n                   self.dim_x, self.dim_x, P.shape)\n\n        assert F.shape == (self.dim_x, self.dim_x), \\\n               \"Shape of F must be ({},{}), but is {}\".format(\n                   self.dim_x, self.dim_x, F.shape)\n\n        assert np.ndim(H) == 2, \\\n               \"Shape of H must be (dim_z, {}), but is {}\".format(\n                   P.shape[0], shape(H))\n\n        assert H.shape[1] == P.shape[0], \\\n               \"Shape of H must be (dim_z, {}), but is {}\".format(\n                   P.shape[0], H.shape)\n\n        # shape of R must be the same as HPH'\n        hph_shape = (H.shape[0], H.shape[0])\n        r_shape = shape(R)\n\n        if H.shape[0] == 1:\n            # r can be scalar, 1D, or 2D in this case\n            assert r_shape in [(), (1,), (1, 1)], \\\n            \"R must be scalar or one element array, but is shaped {}\".format(\n                r_shape)\n        else:\n            assert r_shape == hph_shape, \\\n            \"shape of R should be {} but it is {}\".format(hph_shape, r_shape)\n\n\n        if z is not None:\n            z_shape = shape(z)\n        else:\n            z_shape = (self.dim_z, 1)\n\n        # H@x must have shape of z\n        Hx = dot(H, x)\n\n        if z_shape == (): # scalar or np.array(scalar)\n            assert Hx.ndim == 1 or shape(Hx) == (1, 1), \\\n            \"shape of z should be {}, not {} for the given H\".format(\n                shape(Hx), z_shape)\n\n        elif shape(Hx) == (1,):\n            assert z_shape[0] == 1, 'Shape of z must be {} for the given H'.format(shape(Hx))\n\n        else:\n            assert (z_shape == shape(Hx) or\n                    (len(z_shape) == 1 and shape(Hx) == (z_shape[0], 1))), \\\n                    \"shape of z should be {}, not {} for the given H\".format(\n                        shape(Hx), z_shape)\n\n        if np.ndim(Hx) > 1 and shape(Hx) != (1, 1):\n            assert shape(Hx) == z_shape, \\\n               'shape of z should be {} for the given H, but it is {}'.format(\n                   shape(Hx), z_shape)\n\n\ndef update(x, P, z, R, H=None, return_all=False):\n    \"\"\"\n    Add a new measurement (z) to the Kalman filter. If z is None, nothing\n    is changed.\n    This can handle either the multidimensional or unidimensional case. If\n    all parameters are floats instead of arrays the filter will still work,\n    and return floats for x, P as the result.\n    update(1, 2, 1, 1, 1)  # univariate\n    update(x, P, 1\n    Parameters\n    ----------\n    x : numpy.array(dim_x, 1), or float\n        State estimate vector\n    P : numpy.array(dim_x, dim_x), or float\n        Covariance matrix\n    z : (dim_z, 1): array_like\n        measurement for this update. z can be a scalar if dim_z is 1,\n        otherwise it must be convertible to a column vector.\n    R : numpy.array(dim_z, dim_z), or float\n        Measurement noise matrix\n    H : numpy.array(dim_x, dim_x), or float, optional\n        Measurement function. If not provided, a value of 1 is assumed.\n    return_all : bool, default False\n        If true, y, K, S, and log_likelihood are returned, otherwise\n        only x and P are returned.\n    Returns\n    -------\n    x : numpy.array\n        Posterior state estimate vector\n    P : numpy.array\n        Posterior covariance matrix\n    y : numpy.array or scalar\n        Residua. Difference between measurement and state in measurement space\n    K : numpy.array\n        Kalman gain\n    S : numpy.array\n        System uncertainty in measurement space\n    log_likelihood : float\n        log likelihood of the measurement\n    \"\"\"\n\n    #pylint: disable=bare-except\n\n    if z is None:\n        if return_all:\n            return x, P, None, None, None, None\n        return x, P\n\n    if H is None:\n        H = np.array([1])\n\n    if np.isscalar(H):\n        H = np.array([H])\n\n    Hx = np.atleast_1d(dot(H, x))\n    z = reshape_z(z, Hx.shape[0], x.ndim)\n\n    # error (residual) between measurement and prediction\n    y = z - Hx\n\n    # project system uncertainty into measurement space\n    S = dot(dot(H, P), H.T) + R\n\n\n    # map system uncertainty into kalman gain\n    try:\n        K = dot(dot(P, H.T), linalg.inv(S))\n    except:\n        # can't invert a 1D array, annoyingly\n        K = dot(dot(P, H.T), 1./S)\n\n\n    # predict new x with residual scaled by the kalman gain\n    x = x + dot(K, y)\n\n    # P = (I-KH)P(I-KH)' + KRK'\n    KH = dot(K, H)\n\n    try:\n        I_KH = np.eye(KH.shape[0]) - KH\n    except:\n        I_KH = np.array([1 - KH])\n    P = dot(dot(I_KH, P), I_KH.T) + dot(dot(K, R), K.T)\n\n\n    if return_all:\n        # compute log likelihood\n        log_likelihood = logpdf(z, dot(H, x), S)\n        return x, P, y, K, S, log_likelihood\n    return x, P\n\n\ndef update_steadystate(x, z, K, H=None):\n    \"\"\"\n    Add a new measurement (z) to the Kalman filter. If z is None, nothing\n    is changed.\n    Parameters\n    ----------\n    x : numpy.array(dim_x, 1), or float\n        State estimate vector\n    z : (dim_z, 1): array_like\n        measurement for this update. z can be a scalar if dim_z is 1,\n        otherwise it must be convertible to a column vector.\n    K : numpy.array, or float\n        Kalman gain matrix\n    H : numpy.array(dim_x, dim_x), or float, optional\n        Measurement function. If not provided, a value of 1 is assumed.\n    Returns\n    -------\n    x : numpy.array\n        Posterior state estimate vector\n    Examples\n    --------\n    This can handle either the multidimensional or unidimensional case. If\n    all parameters are floats instead of arrays the filter will still work,\n    and return floats for x, P as the result.\n    >>> update_steadystate(1, 2, 1)  # univariate\n    >>> update_steadystate(x, P, z, H)\n    \"\"\"\n\n\n    if z is None:\n        return x\n\n    if H is None:\n        H = np.array([1])\n\n    if np.isscalar(H):\n        H = np.array([H])\n\n    Hx = np.atleast_1d(dot(H, x))\n    z = reshape_z(z, Hx.shape[0], x.ndim)\n\n    # error (residual) between measurement and prediction\n    y = z - Hx\n\n    # estimate new x with residual scaled by the kalman gain\n    return x + dot(K, y)\n\n\ndef predict(x, P, F=1, Q=0, u=0, B=1, alpha=1.):\n    \"\"\"\n    Predict next state (prior) using the Kalman filter state propagation\n    equations.\n    Parameters\n    ----------\n    x : numpy.array\n        State estimate vector\n    P : numpy.array\n        Covariance matrix\n    F : numpy.array()\n        State Transition matrix\n    Q : numpy.array, Optional\n        Process noise matrix\n    u : numpy.array, Optional, default 0.\n        Control vector. If non-zero, it is multiplied by B\n        to create the control input into the system.\n    B : numpy.array, optional, default 0.\n        Control transition matrix.\n    alpha : float, Optional, default=1.0\n        Fading memory setting. 1.0 gives the normal Kalman filter, and\n        values slightly larger than 1.0 (such as 1.02) give a fading\n        memory effect - previous measurements have less influence on the\n        filter's estimates. This formulation of the Fading memory filter\n        (there are many) is due to Dan Simon\n    Returns\n    -------\n    x : numpy.array\n        Prior state estimate vector\n    P : numpy.array\n        Prior covariance matrix\n    \"\"\"\n\n    if np.isscalar(F):\n        F = np.array(F)\n    x = dot(F, x) + dot(B, u)\n    P = (alpha * alpha) * dot(dot(F, P), F.T) + Q\n\n    return x, P\n\n\ndef predict_steadystate(x, F=1, u=0, B=1):\n    \"\"\"\n    Predict next state (prior) using the Kalman filter state propagation\n    equations. This steady state form only computes x, assuming that the\n    covariance is constant.\n    Parameters\n    ----------\n    x : numpy.array\n        State estimate vector\n    P : numpy.array\n        Covariance matrix\n    F : numpy.array()\n        State Transition matrix\n    u : numpy.array, Optional, default 0.\n        Control vector. If non-zero, it is multiplied by B\n        to create the control input into the system.\n    B : numpy.array, optional, default 0.\n        Control transition matrix.\n    Returns\n    -------\n    x : numpy.array\n        Prior state estimate vector\n    \"\"\"\n\n    if np.isscalar(F):\n        F = np.array(F)\n    x = dot(F, x) + dot(B, u)\n\n    return x\n\n\n\ndef batch_filter(x, P, zs, Fs, Qs, Hs, Rs, Bs=None, us=None,\n                 update_first=False, saver=None):\n    \"\"\"\n    Batch processes a sequences of measurements.\n    Parameters\n    ----------\n    zs : list-like\n        list of measurements at each time step. Missing measurements must be\n        represented by None.\n    Fs : list-like\n        list of values to use for the state transition matrix matrix.\n    Qs : list-like\n        list of values to use for the process error\n        covariance.\n    Hs : list-like\n        list of values to use for the measurement matrix.\n    Rs : list-like\n        list of values to use for the measurement error\n        covariance.\n    Bs : list-like, optional\n        list of values to use for the control transition matrix;\n        a value of None in any position will cause the filter\n        to use `self.B` for that time step.\n    us : list-like, optional\n        list of values to use for the control input vector;\n        a value of None in any position will cause the filter to use\n        0 for that time step.\n    update_first : bool, optional\n        controls whether the order of operations is update followed by\n        predict, or predict followed by update. Default is predict->update.\n        saver : filterpy.common.Saver, optional\n            filterpy.common.Saver object. If provided, saver.save() will be\n            called after every epoch\n    Returns\n    -------\n    means : np.array((n,dim_x,1))\n        array of the state for each time step after the update. Each entry\n        is an np.array. In other words `means[k,:]` is the state at step\n        `k`.\n    covariance : np.array((n,dim_x,dim_x))\n        array of the covariances for each time step after the update.\n        In other words `covariance[k,:,:]` is the covariance at step `k`.\n    means_predictions : np.array((n,dim_x,1))\n        array of the state for each time step after the predictions. Each\n        entry is an np.array. In other words `means[k,:]` is the state at\n        step `k`.\n    covariance_predictions : np.array((n,dim_x,dim_x))\n        array of the covariances for each time step after the prediction.\n        In other words `covariance[k,:,:]` is the covariance at step `k`.\n    Examples\n    --------\n    .. code-block:: Python\n        zs = [t + random.randn()*4 for t in range (40)]\n        Fs = [kf.F for t in range (40)]\n        Hs = [kf.H for t in range (40)]\n        (mu, cov, _, _) = kf.batch_filter(zs, Rs=R_list, Fs=Fs, Hs=Hs, Qs=None,\n                                          Bs=None, us=None, update_first=False)\n        (xs, Ps, Ks, Pps) = kf.rts_smoother(mu, cov, Fs=Fs, Qs=None)\n    \"\"\"\n\n    n = np.size(zs, 0)\n    dim_x = x.shape[0]\n\n    # mean estimates from Kalman Filter\n    if x.ndim == 1:\n        means = zeros((n, dim_x))\n        means_p = zeros((n, dim_x))\n    else:\n        means = zeros((n, dim_x, 1))\n        means_p = zeros((n, dim_x, 1))\n\n    # state covariances from Kalman Filter\n    covariances = zeros((n, dim_x, dim_x))\n    covariances_p = zeros((n, dim_x, dim_x))\n\n    if us is None:\n        us = [0.] * n\n        Bs = [0.] * n\n\n    if update_first:\n        for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n            x, P = update(x, P, z, R=R, H=H)\n            means[i, :] = x\n            covariances[i, :, :] = P\n\n            x, P = predict(x, P, u=u, B=B, F=F, Q=Q)\n            means_p[i, :] = x\n            covariances_p[i, :, :] = P\n            if saver is not None:\n                saver.save()\n    else:\n        for i, (z, F, Q, H, R, B, u) in enumerate(zip(zs, Fs, Qs, Hs, Rs, Bs, us)):\n\n            x, P = predict(x, P, u=u, B=B, F=F, Q=Q)\n            means_p[i, :] = x\n            covariances_p[i, :, :] = P\n\n            x, P = update(x, P, z, R=R, H=H)\n            means[i, :] = x\n            covariances[i, :, :] = P\n            if saver is not None:\n                saver.save()\n\n    return (means, covariances, means_p, covariances_p)\n\n\n\ndef rts_smoother(Xs, Ps, Fs, Qs):\n    \"\"\"\n    Runs the Rauch-Tung-Striebel Kalman smoother on a set of\n    means and covariances computed by a Kalman filter. The usual input\n    would come from the output of `KalmanFilter.batch_filter()`.\n    Parameters\n    ----------\n    Xs : numpy.array\n       array of the means (state variable x) of the output of a Kalman\n       filter.\n    Ps : numpy.array\n        array of the covariances of the output of a kalman filter.\n    Fs : list-like collection of numpy.array\n        State transition matrix of the Kalman filter at each time step.\n    Qs : list-like collection of numpy.array, optional\n        Process noise of the Kalman filter at each time step.\n    Returns\n    -------\n    x : numpy.ndarray\n       smoothed means\n    P : numpy.ndarray\n       smoothed state covariances\n    K : numpy.ndarray\n        smoother gain at each step\n    pP : numpy.ndarray\n       predicted state covariances\n    Examples\n    --------\n    .. code-block:: Python\n        zs = [t + random.randn()*4 for t in range (40)]\n        (mu, cov, _, _) = kalman.batch_filter(zs)\n        (x, P, K, pP) = rts_smoother(mu, cov, kf.F, kf.Q)\n    \"\"\"\n\n    if len(Xs) != len(Ps):\n        raise ValueError('length of Xs and Ps must be the same')\n\n    n = Xs.shape[0]\n    dim_x = Xs.shape[1]\n\n    # smoother gain\n    K = zeros((n, dim_x, dim_x))\n    x, P, pP = Xs.copy(), Ps.copy(), Ps.copy()\n\n    for k in range(n-2, -1, -1):\n        pP[k] = dot(dot(Fs[k], P[k]), Fs[k].T) + Qs[k]\n\n        #pylint: disable=bad-whitespace\n        K[k]  = dot(dot(P[k], Fs[k].T), linalg.inv(pP[k]))\n        x[k] += dot(K[k], x[k+1] - dot(Fs[k], x[k]))\n        P[k] += dot(dot(K[k], P[k+1] - pP[k]), K[k].T)\n\n    return (x, P, K, pP)"
  },
  {
    "path": "DLTA_AI_app/trackers/ocsort/ocsort.py",
    "content": "\"\"\"\n    This script is adopted from the SORT script by Alex Bewley alex@bewley.ai\n\"\"\"\nfrom __future__ import print_function\n\nimport numpy as np\nfrom .association import *\nfrom ultralytics.yolo.utils.ops import xywh2xyxy\n\n\ndef k_previous_obs(observations, cur_age, k):\n    if len(observations) == 0:\n        return [-1, -1, -1, -1, -1]\n    for i in range(k):\n        dt = k - i\n        if cur_age - dt in observations:\n            return observations[cur_age-dt]\n    max_age = max(observations.keys())\n    return observations[max_age]\n\n\ndef convert_bbox_to_z(bbox):\n    \"\"\"\n    Takes a bounding box in the form [x1,y1,x2,y2] and returns z in the form\n      [x,y,s,r] where x,y is the centre of the box and s is the scale/area and r is\n      the aspect ratio\n    \"\"\"\n    w = bbox[2] - bbox[0]\n    h = bbox[3] - bbox[1]\n    x = bbox[0] + w/2.\n    y = bbox[1] + h/2.\n    s = w * h  # scale is just area\n    r = w / float(h+1e-6)\n    return np.array([x, y, s, r]).reshape((4, 1))\n\n\ndef convert_x_to_bbox(x, score=None):\n    \"\"\"\n    Takes a bounding box in the centre form [x,y,s,r] and returns it in the form\n      [x1,y1,x2,y2] where x1,y1 is the top left and x2,y2 is the bottom right\n    \"\"\"\n    w = np.sqrt(x[2] * x[3])\n    h = x[2] / w\n    if(score == None):\n      return np.array([x[0]-w/2., x[1]-h/2., x[0]+w/2., x[1]+h/2.]).reshape((1, 4))\n    else:\n      return np.array([x[0]-w/2., x[1]-h/2., x[0]+w/2., x[1]+h/2., score]).reshape((1, 5))\n\n\ndef speed_direction(bbox1, bbox2):\n    cx1, cy1 = (bbox1[0]+bbox1[2]) / 2.0, (bbox1[1]+bbox1[3])/2.0\n    cx2, cy2 = (bbox2[0]+bbox2[2]) / 2.0, (bbox2[1]+bbox2[3])/2.0\n    speed = np.array([cy2-cy1, cx2-cx1])\n    norm = np.sqrt((cy2-cy1)**2 + (cx2-cx1)**2) + 1e-6\n    return speed / norm\n\n\nclass KalmanBoxTracker(object):\n    \"\"\"\n    This class represents the internal state of individual tracked objects observed as bbox.\n    \"\"\"\n    count = 0\n\n    def __init__(self, bbox, cls, delta_t=3, orig=False):\n        \"\"\"\n        Initialises a tracker using initial bounding box.\n\n        \"\"\"\n        # define constant velocity model\n        if not orig:\n          from .kalmanfilter import KalmanFilterNew as KalmanFilter\n          self.kf = KalmanFilter(dim_x=7, dim_z=4)\n        else:\n          from filterpy.kalman import KalmanFilter\n          self.kf = KalmanFilter(dim_x=7, dim_z=4)\n        self.kf.F = np.array([[1, 0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0, 1], [\n                            0, 0, 0, 1, 0, 0, 0],  [0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 0, 0, 1]])\n        self.kf.H = np.array([[1, 0, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0],\n                            [0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0]])\n\n        self.kf.R[2:, 2:] *= 10.\n        self.kf.P[4:, 4:] *= 1000.  # give high uncertainty to the unobservable initial velocities\n        self.kf.P *= 10.\n        self.kf.Q[-1, -1] *= 0.01\n        self.kf.Q[4:, 4:] *= 0.01\n\n        self.kf.x[:4] = convert_bbox_to_z(bbox)\n        self.time_since_update = 0\n        self.id = KalmanBoxTracker.count\n        KalmanBoxTracker.count += 1\n        self.history = []\n        self.hits = 0\n        self.hit_streak = 0\n        self.age = 0\n        self.conf = bbox[-1]\n        self.cls = cls\n        \"\"\"\n        NOTE: [-1,-1,-1,-1,-1] is a compromising placeholder for non-observation status, the same for the return of \n        function k_previous_obs. It is ugly and I do not like it. But to support generate observation array in a \n        fast and unified way, which you would see below k_observations = np.array([k_previous_obs(...]]), let's bear it for now.\n        \"\"\"\n        self.last_observation = np.array([-1, -1, -1, -1, -1])  # placeholder\n        self.observations = dict()\n        self.history_observations = []\n        self.velocity = None\n        self.delta_t = delta_t\n\n    def update(self, bbox, cls):\n        \"\"\"\n        Updates the state vector with observed bbox.\n        \"\"\"\n        \n        if bbox is not None:\n            self.conf = bbox[-1]\n            self.cls = cls\n            if self.last_observation.sum() >= 0:  # no previous observation\n                previous_box = None\n                for i in range(self.delta_t):\n                    dt = self.delta_t - i\n                    if self.age - dt in self.observations:\n                        previous_box = self.observations[self.age-dt]\n                        break\n                if previous_box is None:\n                    previous_box = self.last_observation\n                \"\"\"\n                  Estimate the track speed direction with observations \\Delta t steps away\n                \"\"\"\n                self.velocity = speed_direction(previous_box, bbox)\n            \n            \"\"\"\n              Insert new observations. This is a ugly way to maintain both self.observations\n              and self.history_observations. Bear it for the moment.\n            \"\"\"\n            self.last_observation = bbox\n            self.observations[self.age] = bbox\n            self.history_observations.append(bbox)\n\n            self.time_since_update = 0\n            self.history = []\n            self.hits += 1\n            self.hit_streak += 1\n            self.kf.update(convert_bbox_to_z(bbox))\n        else:\n            self.kf.update(bbox)\n\n    def predict(self):\n        \"\"\"\n        Advances the state vector and returns the predicted bounding box estimate.\n        \"\"\"\n        if((self.kf.x[6]+self.kf.x[2]) <= 0):\n            self.kf.x[6] *= 0.0\n\n        self.kf.predict()\n        self.age += 1\n        if(self.time_since_update > 0):\n            self.hit_streak = 0\n        self.time_since_update += 1\n        self.history.append(convert_x_to_bbox(self.kf.x))\n        return self.history[-1]\n\n    def get_state(self):\n        \"\"\"\n        Returns the current bounding box estimate.\n        \"\"\"\n        return convert_x_to_bbox(self.kf.x)\n\n\n\"\"\"\n    We support multiple ways for association cost calculation, by default\n    we use IoU. GIoU may have better performance in some situations. We note \n    that we hardly normalize the cost by all methods to (0,1) which may not be \n    the best practice.\n\"\"\"\nASSO_FUNCS = {  \"iou\": iou_batch,\n                \"giou\": giou_batch,\n                \"ciou\": ciou_batch,\n                \"diou\": diou_batch,\n                \"ct_dist\": ct_dist}\n\n\nclass OCSort(object):\n    def __init__(self, det_thresh, max_age=30, min_hits=3, \n        iou_threshold=0.3, delta_t=3, asso_func=\"iou\", inertia=0.2, use_byte=False):\n        \"\"\"\n        Sets key parameters for SORT\n        \"\"\"\n        self.max_age = max_age\n        self.min_hits = min_hits\n        self.iou_threshold = iou_threshold\n        self.trackers = []\n        self.frame_count = 0\n        self.det_thresh = det_thresh\n        self.delta_t = delta_t\n        self.asso_func = ASSO_FUNCS[asso_func]\n        self.inertia = inertia\n        self.use_byte = use_byte\n        KalmanBoxTracker.count = 0\n\n    def update(self, dets, _):\n        \"\"\"\n        Params:\n          dets - a numpy array of detections in the format [[x1,y1,x2,y2,score],[x1,y1,x2,y2,score],...]\n        Requires: this method must be called once for each frame even with empty detections (use np.empty((0, 5)) for frames without detections).\n        Returns the a similar array, where the last column is the object ID.\n        NOTE: The number of objects returned may differ from the number of detections provided.\n        \"\"\"\n\n        self.frame_count += 1\n        \n        xyxys = dets[:, 0:4]\n        confs = dets[:, 4]\n        clss = dets[:, 5]\n        \n        classes = clss.numpy()\n        xyxys = xyxys.numpy()\n        confs = confs.numpy()\n\n        output_results = np.column_stack((xyxys, confs, classes))\n        \n        inds_low = confs > 0.1\n        inds_high = confs < self.det_thresh\n        inds_second = np.logical_and(inds_low, inds_high)  # self.det_thresh > score > 0.1, for second matching\n        dets_second = output_results[inds_second]  # detections for second matching\n        remain_inds = confs > self.det_thresh\n        dets = output_results[remain_inds]\n\n        # get predicted locations from existing trackers.\n        trks = np.zeros((len(self.trackers), 5))\n        to_del = []\n        ret = []\n        for t, trk in enumerate(trks):\n            pos = self.trackers[t].predict()[0]\n            trk[:] = [pos[0], pos[1], pos[2], pos[3], 0]\n            if np.any(np.isnan(pos)):\n                to_del.append(t)\n        trks = np.ma.compress_rows(np.ma.masked_invalid(trks))\n        for t in reversed(to_del):\n            self.trackers.pop(t)\n\n        velocities = np.array(\n            [trk.velocity if trk.velocity is not None else np.array((0, 0)) for trk in self.trackers])\n        last_boxes = np.array([trk.last_observation for trk in self.trackers])\n        k_observations = np.array(\n            [k_previous_obs(trk.observations, trk.age, self.delta_t) for trk in self.trackers])\n\n        \"\"\"\n            First round of association\n        \"\"\"\n        matched, unmatched_dets, unmatched_trks = associate(\n            dets, trks, self.iou_threshold, velocities, k_observations, self.inertia)\n        for m in matched:\n            self.trackers[m[1]].update(dets[m[0], :5], dets[m[0], 5])\n\n        \"\"\"\n            Second round of associaton by OCR\n        \"\"\"\n        # BYTE association\n        if self.use_byte and len(dets_second) > 0 and unmatched_trks.shape[0] > 0:\n            u_trks = trks[unmatched_trks]\n            iou_left = self.asso_func(dets_second, u_trks)          # iou between low score detections and unmatched tracks\n            iou_left = np.array(iou_left)\n            if iou_left.max() > self.iou_threshold:\n                \"\"\"\n                    NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may\n                    get a higher performance especially on MOT17/MOT20 datasets. But we keep it\n                    uniform here for simplicity\n                \"\"\"\n                matched_indices = linear_assignment(-iou_left)\n                to_remove_trk_indices = []\n                for m in matched_indices:\n                    det_ind, trk_ind = m[0], unmatched_trks[m[1]]\n                    if iou_left[m[0], m[1]] < self.iou_threshold:\n                        continue\n                    self.trackers[trk_ind].update(dets_second[det_ind, :5], dets_second[det_ind, 5])\n                    to_remove_trk_indices.append(trk_ind)\n                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))\n\n        if unmatched_dets.shape[0] > 0 and unmatched_trks.shape[0] > 0:\n            left_dets = dets[unmatched_dets]\n            left_trks = last_boxes[unmatched_trks]\n            iou_left = self.asso_func(left_dets, left_trks)\n            iou_left = np.array(iou_left)\n            if iou_left.max() > self.iou_threshold:\n                \"\"\"\n                    NOTE: by using a lower threshold, e.g., self.iou_threshold - 0.1, you may\n                    get a higher performance especially on MOT17/MOT20 datasets. But we keep it\n                    uniform here for simplicity\n                \"\"\"\n                rematched_indices = linear_assignment(-iou_left)\n                to_remove_det_indices = []\n                to_remove_trk_indices = []\n                for m in rematched_indices:\n                    det_ind, trk_ind = unmatched_dets[m[0]], unmatched_trks[m[1]]\n                    if iou_left[m[0], m[1]] < self.iou_threshold:\n                        continue\n                    self.trackers[trk_ind].update(dets[det_ind, :5], dets[det_ind, 5])\n                    to_remove_det_indices.append(det_ind)\n                    to_remove_trk_indices.append(trk_ind)\n                unmatched_dets = np.setdiff1d(unmatched_dets, np.array(to_remove_det_indices))\n                unmatched_trks = np.setdiff1d(unmatched_trks, np.array(to_remove_trk_indices))\n\n        for m in unmatched_trks:\n            self.trackers[m].update(None, None)\n\n        # create and initialise new trackers for unmatched detections\n        for i in unmatched_dets:\n            trk = KalmanBoxTracker(dets[i, :5], dets[i, 5], delta_t=self.delta_t)\n            self.trackers.append(trk)\n        i = len(self.trackers)\n        for trk in reversed(self.trackers):\n            if trk.last_observation.sum() < 0:\n                d = trk.get_state()[0]\n            else:\n                \"\"\"\n                    this is optional to use the recent observation or the kalman filter prediction,\n                    we didn't notice significant difference here\n                \"\"\"\n                d = trk.last_observation[:4]\n            if (trk.time_since_update < 1) and (trk.hit_streak >= self.min_hits or self.frame_count <= self.min_hits):\n                # +1 as MOT benchmark requires positive\n                ret.append(np.concatenate((d, [trk.id+1], [trk.cls], [trk.conf])).reshape(1, -1))\n            i -= 1\n            # remove dead tracklet\n            if(trk.time_since_update > self.max_age):\n                self.trackers.pop(i)\n        if(len(ret) > 0):\n            return np.concatenate(ret)\n        return np.empty((0, 5))\n"
  },
  {
    "path": "DLTA_AI_app/trackers/reid_export.py",
    "content": "import argparse\n\nimport os\n\n# limit the number of cpus used by high performance libraries\nos.environ[\"OMP_NUM_THREADS\"] = \"1\"\nos.environ[\"OPENBLAS_NUM_THREADS\"] = \"1\"\nos.environ[\"MKL_NUM_THREADS\"] = \"1\"\nos.environ[\"VECLIB_MAXIMUM_THREADS\"] = \"1\"\nos.environ[\"NUMEXPR_NUM_THREADS\"] = \"1\"\n\nimport sys\nimport numpy as np\nfrom pathlib import Path\nimport torch\nimport time\nimport platform\nimport pandas as pd\nimport subprocess\nimport torch.backends.cudnn as cudnn\nfrom torch.utils.mobile_optimizer import optimize_for_mobile\n\nFILE = Path(__file__).resolve()\nROOT = FILE.parents[0].parents[0]  # yolov5 strongsort root directory\nWEIGHTS = ROOT / 'weights'\n\nif str(ROOT) not in sys.path:\n    sys.path.append(str(ROOT))  # add ROOT to PATH\nif str(ROOT / 'yolov5') not in sys.path:\n    sys.path.append(str(ROOT / 'yolov5'))  # add yolov5 ROOT to PATH\n\nROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative\n\nimport logging\nfrom ultralytics.yolo.utils.torch_utils import select_device\nfrom ultralytics.yolo.utils import LOGGER, colorstr, ops\nfrom ultralytics.yolo.utils.checks import check_requirements, check_version\nfrom trackers.strongsort.deep.models import build_model\nfrom trackers.strongsort.deep.reid_model_factory import get_model_name, load_pretrained_weights\n\n\ndef file_size(path):\n    # Return file/dir size (MB)\n    path = Path(path)\n    if path.is_file():\n        return path.stat().st_size / 1E6\n    elif path.is_dir():\n        return sum(f.stat().st_size for f in path.glob('**/*') if f.is_file()) / 1E6\n    else:\n        return 0.0\n\n\ndef export_formats():\n    # YOLOv5 export formats\n    x = [\n        ['PyTorch', '-', '.pt', True, True],\n        ['TorchScript', 'torchscript', '.torchscript', True, True],\n        ['ONNX', 'onnx', '.onnx', True, True],\n        ['OpenVINO', 'openvino', '_openvino_model', True, False],\n        ['TensorRT', 'engine', '.engine', False, True],\n        ['TensorFlow Lite', 'tflite', '.tflite', True, False],\n    ]\n    return pd.DataFrame(x, columns=['Format', 'Argument', 'Suffix', 'CPU', 'GPU'])\n\n\ndef export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:')):\n    # YOLOv5 TorchScript model export\n    try:\n        LOGGER.info(f'\\n{prefix} starting export with torch {torch.__version__}...')\n        f = file.with_suffix('.torchscript')\n\n        ts = torch.jit.trace(model, im, strict=False)\n        if optimize:  # https://pytorch.org/tutorials/recipes/mobile_interpreter.html\n            optimize_for_mobile(ts)._save_for_lite_interpreter(str(f))\n        else:\n            ts.save(str(f))\n\n        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')\n        return f\n    except Exception as e:\n        LOGGER.info(f'{prefix} export failure: {e}')\n\n\ndef export_onnx(model, im, file, opset, dynamic, fp16, simplify, prefix=colorstr('ONNX:')):\n    # ONNX export\n    try:\n        check_requirements(('onnx',))\n        import onnx\n\n        f = file.with_suffix('.onnx')\n        LOGGER.info(f'\\n{prefix} starting export with onnx {onnx.__version__}...')\n\n        if dynamic:\n            dynamic = {'images': {0: 'batch'}, 'output': {0: 'batch'}}  # input --> shape(1,3,640,640), output --> shape(1,25200,85)\n\n        torch.onnx.export(\n            model.half() if fp16 else model.cpu(),\n            im.half() if fp16 else im.cpu(),\n            f,\n            verbose=False,\n            opset_version=opset,\n            do_constant_folding=True,\n            input_names=['images'],\n            output_names=['output'],\n            dynamic_axes=dynamic or None\n        )\n        # Checks\n        model_onnx = onnx.load(f)  # load onnx model\n        onnx.checker.check_model(model_onnx)  # check onnx model\n        onnx.save(model_onnx, f)\n\n        # Simplify\n        if simplify:\n            try:\n                cuda = torch.cuda.is_available()\n                check_requirements(('onnxruntime-gpu' if cuda else 'onnxruntime', 'onnx-simplifier>=0.4.1'))\n                import onnxsim\n\n                LOGGER.info(f'simplifying with onnx-simplifier {onnxsim.__version__}...')\n                model_onnx, check = onnxsim.simplify(model_onnx)\n                assert check, 'assert check failed'\n                onnx.save(model_onnx, f)\n            except Exception as e:\n                LOGGER.info(f'simplifier failure: {e}')\n        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')\n        return f\n    except Exception as e:\n        LOGGER.info(f'export failure: {e}')\n\n\ndef export_openvino(file, half, prefix=colorstr('OpenVINO:')):\n    # YOLOv5 OpenVINO export\n    check_requirements(('openvino-dev',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/\n    import openvino.inference_engine as ie\n    try:\n        LOGGER.info(f'\\n{prefix} starting export with openvino {ie.__version__}...')\n        f = str(file).replace('.pt', f'_openvino_model{os.sep}')\n\n        cmd = f\"mo --input_model {file.with_suffix('.onnx')} --output_dir {f} --data_type {'FP16' if half else 'FP32'}\"\n        subprocess.check_output(cmd.split())  # export\n    except Exception as e:\n        LOGGER.info(f'export failure: {e}')\n    LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')\n    return f\n\n\ndef export_tflite(file, half, prefix=colorstr('TFLite:')):\n    # YOLOv5 OpenVINO export\n    try:\n        check_requirements(\n            ('openvino2tensorflow', 'tensorflow', 'tensorflow_datasets'))  # requires openvino-dev: https://pypi.org/project/openvino-dev/\n        import openvino.inference_engine as ie\n        LOGGER.info(f'\\n{prefix} starting export with openvino {ie.__version__}...')\n        output = Path(str(file).replace(f'_openvino_model{os.sep}', f'_tflite_model{os.sep}'))\n        modelxml = list(Path(file).glob('*.xml'))[0]\n        cmd = f\"openvino2tensorflow \\\n            --model_path {modelxml} \\\n            --model_output_path {output} \\\n            --output_pb \\\n            --output_saved_model \\\n            --output_no_quant_float32_tflite \\\n            --output_dynamic_range_quant_tflite\"\n        subprocess.check_output(cmd.split())  # export\n\n        LOGGER.info(f'{prefix} export success, results saved in {output} ({file_size(f):.1f} MB)')\n        return f\n    except Exception as e:\n        LOGGER.info(f'\\n{prefix} export failure: {e}')\n\n\ndef export_engine(model, im, file, half, dynamic, simplify, workspace=4, verbose=False, prefix=colorstr('TensorRT:')):\n    # YOLOv5 TensorRT export https://developer.nvidia.com/tensorrt\n    try:\n        assert im.device.type != 'cpu', 'export running on CPU but must be on GPU, i.e. `python export.py --device 0`'\n        try:\n            import tensorrt as trt\n        except Exception:\n            if platform.system() == 'Linux':\n                check_requirements(('nvidia-tensorrt',), cmds=('-U --index-url https://pypi.ngc.nvidia.com',))\n            import tensorrt as trt\n\n        if trt.__version__[0] == '7':  # TensorRT 7 handling https://github.com/ultralytics/yolov5/issues/6012\n            grid = model.model[-1].anchor_grid\n            model.model[-1].anchor_grid = [a[..., :1, :1, :] for a in grid]\n            export_onnx(model, im, file, 12, dynamic, half, simplify)  # opset 12\n            model.model[-1].anchor_grid = grid\n        else:  # TensorRT >= 8\n            check_version(trt.__version__, '8.0.0', hard=True)  # require tensorrt>=8.0.0\n            export_onnx(model, im, file, 12, dynamic, half, simplify)  # opset 13\n        onnx = file.with_suffix('.onnx')\n\n        LOGGER.info(f'\\n{prefix} starting export with TensorRT {trt.__version__}...')\n        assert onnx.exists(), f'failed to export ONNX file: {onnx}'\n        f = file.with_suffix('.engine')  # TensorRT engine file\n        logger = trt.Logger(trt.Logger.INFO)\n        if verbose:\n            logger.min_severity = trt.Logger.Severity.VERBOSE\n\n        builder = trt.Builder(logger)\n        config = builder.create_builder_config()\n        config.max_workspace_size = workspace * 1 << 30\n        # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30)  # fix TRT 8.4 deprecation notice\n\n        flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))\n        network = builder.create_network(flag)\n        parser = trt.OnnxParser(network, logger)\n        if not parser.parse_from_file(str(onnx)):\n            raise RuntimeError(f'failed to load ONNX file: {onnx}')\n\n        inputs = [network.get_input(i) for i in range(network.num_inputs)]\n        outputs = [network.get_output(i) for i in range(network.num_outputs)]\n        LOGGER.info(f'{prefix} Network Description:')\n        for inp in inputs:\n            LOGGER.info(f'{prefix}\\tinput \"{inp.name}\" with shape {inp.shape} and dtype {inp.dtype}')\n        for out in outputs:\n            LOGGER.info(f'{prefix}\\toutput \"{out.name}\" with shape {out.shape} and dtype {out.dtype}')\n\n        if dynamic:\n            if im.shape[0] <= 1:\n                LOGGER.warning(f\"{prefix}WARNING: --dynamic model requires maximum --batch-size argument\")\n            profile = builder.create_optimization_profile()\n            for inp in inputs:\n                if half:\n                    inp.dtype = trt.float16\n                profile.set_shape(inp.name, (1, *im.shape[1:]), (max(1, im.shape[0] // 2), *im.shape[1:]), im.shape)\n            config.add_optimization_profile(profile)\n\n        LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 and half else 32} engine in {f}')\n        if builder.platform_has_fast_fp16 and half:\n            config.set_flag(trt.BuilderFlag.FP16)\n            config.default_device_type = trt.DeviceType.GPU\n        with builder.build_engine(network, config) as engine, open(f, 'wb') as t:\n            t.write(engine.serialize())\n        LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')\n        return f\n    except Exception as e:\n        LOGGER.info(f'\\n{prefix} export failure: {e}')\n\n\nif __name__ == \"__main__\":\n\n    parser = argparse.ArgumentParser(description=\"ReID export\")\n    parser.add_argument('--batch-size', type=int, default=1, help='batch size')\n    parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[256, 128], help='image (h, w)')\n    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')\n    parser.add_argument('--optimize', action='store_true', help='TorchScript: optimize for mobile')\n    parser.add_argument('--dynamic', action='store_true', help='ONNX/TF/TensorRT: dynamic axes')\n    parser.add_argument('--simplify', action='store_true', help='ONNX: simplify model')\n    parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')\n    parser.add_argument('--workspace', type=int, default=4, help='TensorRT: workspace size (GB)')\n    parser.add_argument('--verbose', action='store_true', help='TensorRT: verbose log')\n    parser.add_argument('--weights', nargs='+', type=str, default=WEIGHTS / 'osnet_x0_25_msmt17.pt', help='model.pt path(s)')\n    parser.add_argument('--half', action='store_true', help='FP16 half-precision export')\n    parser.add_argument('--include',\n                        nargs='+',\n                        default=['torchscript'],\n                        help='torchscript, onnx, openvino, engine')\n    args = parser.parse_args()\n\n    t = time.time()\n\n    include = [x.lower() for x in args.include]  # to lowercase\n    fmts = tuple(export_formats()['Argument'][1:])  # --include arguments\n    flags = [x in include for x in fmts]\n    assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {fmts}'\n    jit, onnx, openvino, engine, tflite = flags  # export booleans\n\n    args.device = select_device(args.device)\n    if args.half:\n        assert args.device.type != 'cpu', '--half only compatible with GPU export, i.e. use --device 0'\n        # assert not args.dynamic, '--half not compatible with --dynamic, i.e. use either --half or --dynamic but not both'\n\n    if type(args.weights) is list:\n        args.weights = Path(args.weights[0])\n\n    model = build_model(\n        get_model_name(args.weights),\n        num_classes=1,\n        pretrained=not (args.weights and args.weights.is_file() and args.weights.suffix == '.pt'),\n        use_gpu=args.device\n    ).to(args.device)\n    load_pretrained_weights(model, args.weights)\n    model.eval()\n\n    if args.optimize:\n        assert device.type == 'cpu', '--optimize not compatible with cuda devices, i.e. use --device cpu'\n\n    im = torch.zeros(args.batch_size, 3, args.imgsz[0], args.imgsz[1]).to(args.device)  # image size(1,3,640,480) BCHW iDetection\n    for _ in range(2):\n        y = model(im)  # dry runs\n    if args.half:\n        im, model = im.half(), model.half()  # to FP16\n    shape = tuple((y[0] if isinstance(y, tuple) else y).shape)  # model output shape\n    LOGGER.info(f\"\\n{colorstr('PyTorch:')} starting from {args.weights} with output shape {shape} ({file_size(args.weights):.1f} MB)\")\n\n    # Exports\n    f = [''] * len(fmts)  # exported filenames\n    if jit:\n        f[0] = export_torchscript(model, im, args.weights, args.optimize)  # opset 12\n    if engine:  # TensorRT required before ONNX\n        f[1] = export_engine(model, im, args.weights, args.half, args.dynamic, args.simplify, args.workspace, args.verbose)\n    if onnx:  # OpenVINO requires ONNX\n        f[2] = export_onnx(model, im, args.weights, args.opset, args.dynamic, args.half, args.simplify)  # opset 12\n    if openvino:\n        f[3] = export_openvino(args.weights, args.half)\n    if tflite:\n        export_tflite(f, False)\n\n    # Finish\n    f = [str(x) for x in f if x]  # filter out '' and None\n    if any(f):\n        LOGGER.info(f'\\nExport complete ({time.time() - t:.1f}s)'\n                    f\"\\nResults saved to {colorstr('bold', args.weights.parent.resolve())}\"\n                    f\"\\nVisualize:       https://netron.app\")\n\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/.gitignore",
    "content": "# Folders\n__pycache__/\nbuild/\n*.egg-info\n\n\n# Files\n*.weights\n*.t7\n*.mp4\n*.avi\n*.so\n*.txt\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/__init__.py",
    "content": ""
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/configs/strongsort.yaml",
    "content": "strongsort:\n  ecc: true\n  ema_alpha: 0.8962157769329083\n  max_age: 40\n  max_dist: 0.1594374041012136\n  max_iou_dist: 0.5431835667667874\n  max_unmatched_preds: 0\n  mc_lambda: 0.995\n  n_init: 3\n  nn_budget: 100\n  conf_thres: 0.5122620708221085\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/checkpoint/.gitkeep",
    "content": ""
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/__init__.py",
    "content": "from __future__ import absolute_import\nimport torch\n\nfrom .pcb import *\nfrom .mlfn import *\nfrom .hacnn import *\nfrom .osnet import *\nfrom .senet import *\nfrom .mudeep import *\nfrom .nasnet import *\nfrom .resnet import *\nfrom .densenet import *\nfrom .xception import *\nfrom .osnet_ain import *\nfrom .resnetmid import *\nfrom .shufflenet import *\nfrom .squeezenet import *\nfrom .inceptionv4 import *\nfrom .mobilenetv2 import *\nfrom .resnet_ibn_a import *\nfrom .resnet_ibn_b import *\nfrom .shufflenetv2 import *\nfrom .inceptionresnetv2 import *\n\n__model_factory = {\n    # image classification models\n    'resnet18': resnet18,\n    'resnet34': resnet34,\n    'resnet50': resnet50,\n    'resnet101': resnet101,\n    'resnet152': resnet152,\n    'resnext50_32x4d': resnext50_32x4d,\n    'resnext101_32x8d': resnext101_32x8d,\n    'resnet50_fc512': resnet50_fc512,\n    'se_resnet50': se_resnet50,\n    'se_resnet50_fc512': se_resnet50_fc512,\n    'se_resnet101': se_resnet101,\n    'se_resnext50_32x4d': se_resnext50_32x4d,\n    'se_resnext101_32x4d': se_resnext101_32x4d,\n    'densenet121': densenet121,\n    'densenet169': densenet169,\n    'densenet201': densenet201,\n    'densenet161': densenet161,\n    'densenet121_fc512': densenet121_fc512,\n    'inceptionresnetv2': inceptionresnetv2,\n    'inceptionv4': inceptionv4,\n    'xception': xception,\n    'resnet50_ibn_a': resnet50_ibn_a,\n    'resnet50_ibn_b': resnet50_ibn_b,\n    # lightweight models\n    'nasnsetmobile': nasnetamobile,\n    'mobilenetv2_x1_0': mobilenetv2_x1_0,\n    'mobilenetv2_x1_4': mobilenetv2_x1_4,\n    'shufflenet': shufflenet,\n    'squeezenet1_0': squeezenet1_0,\n    'squeezenet1_0_fc512': squeezenet1_0_fc512,\n    'squeezenet1_1': squeezenet1_1,\n    'shufflenet_v2_x0_5': shufflenet_v2_x0_5,\n    'shufflenet_v2_x1_0': shufflenet_v2_x1_0,\n    'shufflenet_v2_x1_5': shufflenet_v2_x1_5,\n    'shufflenet_v2_x2_0': shufflenet_v2_x2_0,\n    # reid-specific models\n    'mudeep': MuDeep,\n    'resnet50mid': resnet50mid,\n    'hacnn': HACNN,\n    'pcb_p6': pcb_p6,\n    'pcb_p4': pcb_p4,\n    'mlfn': mlfn,\n    'osnet_x1_0': osnet_x1_0,\n    'osnet_x0_75': osnet_x0_75,\n    'osnet_x0_5': osnet_x0_5,\n    'osnet_x0_25': osnet_x0_25,\n    'osnet_ibn_x1_0': osnet_ibn_x1_0,\n    'osnet_ain_x1_0': osnet_ain_x1_0,\n    'osnet_ain_x0_75': osnet_ain_x0_75,\n    'osnet_ain_x0_5': osnet_ain_x0_5,\n    'osnet_ain_x0_25': osnet_ain_x0_25\n}\n\n\ndef show_avai_models():\n    \"\"\"Displays available models.\n\n    Examples::\n        >>> from torchreid import models\n        >>> models.show_avai_models()\n    \"\"\"\n    print(list(__model_factory.keys()))\n\n\ndef build_model(\n    name, num_classes, loss='softmax', pretrained=True, use_gpu=True\n):\n    \"\"\"A function wrapper for building a model.\n\n    Args:\n        name (str): model name.\n        num_classes (int): number of training identities.\n        loss (str, optional): loss function to optimize the model. Currently\n            supports \"softmax\" and \"triplet\". Default is \"softmax\".\n        pretrained (bool, optional): whether to load ImageNet-pretrained weights.\n            Default is True.\n        use_gpu (bool, optional): whether to use gpu. Default is True.\n\n    Returns:\n        nn.Module\n\n    Examples::\n        >>> from torchreid import models\n        >>> model = models.build_model('resnet50', 751, loss='softmax')\n    \"\"\"\n    avai_models = list(__model_factory.keys())\n    if name not in avai_models:\n        raise KeyError(\n            'Unknown model: {}. Must be one of {}'.format(name, avai_models)\n        )\n    return __model_factory[name](\n        num_classes=num_classes,\n        loss=loss,\n        pretrained=pretrained,\n        use_gpu=use_gpu\n    )\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/densenet.py",
    "content": "\"\"\"\nCode source: https://github.com/pytorch/vision\n\"\"\"\nfrom __future__ import division, absolute_import\nimport re\nfrom collections import OrderedDict\nimport torch\nimport torch.nn as nn\nfrom torch.nn import functional as F\nfrom torch.utils import model_zoo\n\n__all__ = [\n    'densenet121', 'densenet169', 'densenet201', 'densenet161',\n    'densenet121_fc512'\n]\n\nmodel_urls = {\n    'densenet121':\n    'https://download.pytorch.org/models/densenet121-a639ec97.pth',\n    'densenet169':\n    'https://download.pytorch.org/models/densenet169-b2777c0a.pth',\n    'densenet201':\n    'https://download.pytorch.org/models/densenet201-c1103571.pth',\n    'densenet161':\n    'https://download.pytorch.org/models/densenet161-8d451a50.pth',\n}\n\n\nclass _DenseLayer(nn.Sequential):\n\n    def __init__(self, num_input_features, growth_rate, bn_size, drop_rate):\n        super(_DenseLayer, self).__init__()\n        self.add_module('norm1', nn.BatchNorm2d(num_input_features)),\n        self.add_module('relu1', nn.ReLU(inplace=True)),\n        self.add_module(\n            'conv1',\n            nn.Conv2d(\n                num_input_features,\n                bn_size * growth_rate,\n                kernel_size=1,\n                stride=1,\n                bias=False\n            )\n        ),\n        self.add_module('norm2', nn.BatchNorm2d(bn_size * growth_rate)),\n        self.add_module('relu2', nn.ReLU(inplace=True)),\n        self.add_module(\n            'conv2',\n            nn.Conv2d(\n                bn_size * growth_rate,\n                growth_rate,\n                kernel_size=3,\n                stride=1,\n                padding=1,\n                bias=False\n            )\n        ),\n        self.drop_rate = drop_rate\n\n    def forward(self, x):\n        new_features = super(_DenseLayer, self).forward(x)\n        if self.drop_rate > 0:\n            new_features = F.dropout(\n                new_features, p=self.drop_rate, training=self.training\n            )\n        return torch.cat([x, new_features], 1)\n\n\nclass _DenseBlock(nn.Sequential):\n\n    def __init__(\n        self, num_layers, num_input_features, bn_size, growth_rate, drop_rate\n    ):\n        super(_DenseBlock, self).__init__()\n        for i in range(num_layers):\n            layer = _DenseLayer(\n                num_input_features + i*growth_rate, growth_rate, bn_size,\n                drop_rate\n            )\n            self.add_module('denselayer%d' % (i+1), layer)\n\n\nclass _Transition(nn.Sequential):\n\n    def __init__(self, num_input_features, num_output_features):\n        super(_Transition, self).__init__()\n        self.add_module('norm', nn.BatchNorm2d(num_input_features))\n        self.add_module('relu', nn.ReLU(inplace=True))\n        self.add_module(\n            'conv',\n            nn.Conv2d(\n                num_input_features,\n                num_output_features,\n                kernel_size=1,\n                stride=1,\n                bias=False\n            )\n        )\n        self.add_module('pool', nn.AvgPool2d(kernel_size=2, stride=2))\n\n\nclass DenseNet(nn.Module):\n    \"\"\"Densely connected network.\n    \n    Reference:\n        Huang et al. Densely Connected Convolutional Networks. CVPR 2017.\n\n    Public keys:\n        - ``densenet121``: DenseNet121.\n        - ``densenet169``: DenseNet169.\n        - ``densenet201``: DenseNet201.\n        - ``densenet161``: DenseNet161.\n        - ``densenet121_fc512``: DenseNet121 + FC.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        growth_rate=32,\n        block_config=(6, 12, 24, 16),\n        num_init_features=64,\n        bn_size=4,\n        drop_rate=0,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    ):\n\n        super(DenseNet, self).__init__()\n        self.loss = loss\n\n        # First convolution\n        self.features = nn.Sequential(\n            OrderedDict(\n                [\n                    (\n                        'conv0',\n                        nn.Conv2d(\n                            3,\n                            num_init_features,\n                            kernel_size=7,\n                            stride=2,\n                            padding=3,\n                            bias=False\n                        )\n                    ),\n                    ('norm0', nn.BatchNorm2d(num_init_features)),\n                    ('relu0', nn.ReLU(inplace=True)),\n                    (\n                        'pool0',\n                        nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n                    ),\n                ]\n            )\n        )\n\n        # Each denseblock\n        num_features = num_init_features\n        for i, num_layers in enumerate(block_config):\n            block = _DenseBlock(\n                num_layers=num_layers,\n                num_input_features=num_features,\n                bn_size=bn_size,\n                growth_rate=growth_rate,\n                drop_rate=drop_rate\n            )\n            self.features.add_module('denseblock%d' % (i+1), block)\n            num_features = num_features + num_layers*growth_rate\n            if i != len(block_config) - 1:\n                trans = _Transition(\n                    num_input_features=num_features,\n                    num_output_features=num_features // 2\n                )\n                self.features.add_module('transition%d' % (i+1), trans)\n                num_features = num_features // 2\n\n        # Final batch norm\n        self.features.add_module('norm5', nn.BatchNorm2d(num_features))\n\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.feature_dim = num_features\n        self.fc = self._construct_fc_layer(fc_dims, num_features, dropout_p)\n\n        # Linear layer\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer.\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def forward(self, x):\n        f = self.features(x)\n        f = F.relu(f, inplace=True)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if self.fc is not None:\n            v = self.fc(v)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n\n    # '.'s are no longer allowed in module names, but pervious _DenseLayer\n    # has keys 'norm.1', 'relu.1', 'conv.1', 'norm.2', 'relu.2', 'conv.2'.\n    # They are also in the checkpoints in model_urls. This pattern is used\n    # to find such keys.\n    pattern = re.compile(\n        r'^(.*denselayer\\d+\\.(?:norm|relu|conv))\\.((?:[12])\\.(?:weight|bias|running_mean|running_var))$'\n    )\n    for key in list(pretrain_dict.keys()):\n        res = pattern.match(key)\n        if res:\n            new_key = res.group(1) + res.group(2)\n            pretrain_dict[new_key] = pretrain_dict[key]\n            del pretrain_dict[key]\n\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\n\"\"\"\nDense network configurations:\n--\ndensenet121: num_init_features=64, growth_rate=32, block_config=(6, 12, 24, 16)\ndensenet169: num_init_features=64, growth_rate=32, block_config=(6, 12, 32, 32)\ndensenet201: num_init_features=64, growth_rate=32, block_config=(6, 12, 48, 32)\ndensenet161: num_init_features=96, growth_rate=48, block_config=(6, 12, 36, 24)\n\"\"\"\n\n\ndef densenet121(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = DenseNet(\n        num_classes=num_classes,\n        loss=loss,\n        num_init_features=64,\n        growth_rate=32,\n        block_config=(6, 12, 24, 16),\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['densenet121'])\n    return model\n\n\ndef densenet169(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = DenseNet(\n        num_classes=num_classes,\n        loss=loss,\n        num_init_features=64,\n        growth_rate=32,\n        block_config=(6, 12, 32, 32),\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['densenet169'])\n    return model\n\n\ndef densenet201(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = DenseNet(\n        num_classes=num_classes,\n        loss=loss,\n        num_init_features=64,\n        growth_rate=32,\n        block_config=(6, 12, 48, 32),\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['densenet201'])\n    return model\n\n\ndef densenet161(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = DenseNet(\n        num_classes=num_classes,\n        loss=loss,\n        num_init_features=96,\n        growth_rate=48,\n        block_config=(6, 12, 36, 24),\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['densenet161'])\n    return model\n\n\ndef densenet121_fc512(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = DenseNet(\n        num_classes=num_classes,\n        loss=loss,\n        num_init_features=64,\n        growth_rate=32,\n        block_config=(6, 12, 24, 16),\n        fc_dims=[512],\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['densenet121'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/hacnn.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = ['HACNN']\n\n\nclass ConvBlock(nn.Module):\n    \"\"\"Basic convolutional block.\n    \n    convolution + batch normalization + relu.\n\n    Args:\n        in_c (int): number of input channels.\n        out_c (int): number of output channels.\n        k (int or tuple): kernel size.\n        s (int or tuple): stride.\n        p (int or tuple): padding.\n    \"\"\"\n\n    def __init__(self, in_c, out_c, k, s=1, p=0):\n        super(ConvBlock, self).__init__()\n        self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)\n        self.bn = nn.BatchNorm2d(out_c)\n\n    def forward(self, x):\n        return F.relu(self.bn(self.conv(x)))\n\n\nclass InceptionA(nn.Module):\n\n    def __init__(self, in_channels, out_channels):\n        super(InceptionA, self).__init__()\n        mid_channels = out_channels // 4\n\n        self.stream1 = nn.Sequential(\n            ConvBlock(in_channels, mid_channels, 1),\n            ConvBlock(mid_channels, mid_channels, 3, p=1),\n        )\n        self.stream2 = nn.Sequential(\n            ConvBlock(in_channels, mid_channels, 1),\n            ConvBlock(mid_channels, mid_channels, 3, p=1),\n        )\n        self.stream3 = nn.Sequential(\n            ConvBlock(in_channels, mid_channels, 1),\n            ConvBlock(mid_channels, mid_channels, 3, p=1),\n        )\n        self.stream4 = nn.Sequential(\n            nn.AvgPool2d(3, stride=1, padding=1),\n            ConvBlock(in_channels, mid_channels, 1),\n        )\n\n    def forward(self, x):\n        s1 = self.stream1(x)\n        s2 = self.stream2(x)\n        s3 = self.stream3(x)\n        s4 = self.stream4(x)\n        y = torch.cat([s1, s2, s3, s4], dim=1)\n        return y\n\n\nclass InceptionB(nn.Module):\n\n    def __init__(self, in_channels, out_channels):\n        super(InceptionB, self).__init__()\n        mid_channels = out_channels // 4\n\n        self.stream1 = nn.Sequential(\n            ConvBlock(in_channels, mid_channels, 1),\n            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),\n        )\n        self.stream2 = nn.Sequential(\n            ConvBlock(in_channels, mid_channels, 1),\n            ConvBlock(mid_channels, mid_channels, 3, p=1),\n            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),\n        )\n        self.stream3 = nn.Sequential(\n            nn.MaxPool2d(3, stride=2, padding=1),\n            ConvBlock(in_channels, mid_channels * 2, 1),\n        )\n\n    def forward(self, x):\n        s1 = self.stream1(x)\n        s2 = self.stream2(x)\n        s3 = self.stream3(x)\n        y = torch.cat([s1, s2, s3], dim=1)\n        return y\n\n\nclass SpatialAttn(nn.Module):\n    \"\"\"Spatial Attention (Sec. 3.1.I.1)\"\"\"\n\n    def __init__(self):\n        super(SpatialAttn, self).__init__()\n        self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)\n        self.conv2 = ConvBlock(1, 1, 1)\n\n    def forward(self, x):\n        # global cross-channel averaging\n        x = x.mean(1, keepdim=True)\n        # 3-by-3 conv\n        x = self.conv1(x)\n        # bilinear resizing\n        x = F.upsample(\n            x, (x.size(2) * 2, x.size(3) * 2),\n            mode='bilinear',\n            align_corners=True\n        )\n        # scaling conv\n        x = self.conv2(x)\n        return x\n\n\nclass ChannelAttn(nn.Module):\n    \"\"\"Channel Attention (Sec. 3.1.I.2)\"\"\"\n\n    def __init__(self, in_channels, reduction_rate=16):\n        super(ChannelAttn, self).__init__()\n        assert in_channels % reduction_rate == 0\n        self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)\n        self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)\n\n    def forward(self, x):\n        # squeeze operation (global average pooling)\n        x = F.avg_pool2d(x, x.size()[2:])\n        # excitation operation (2 conv layers)\n        x = self.conv1(x)\n        x = self.conv2(x)\n        return x\n\n\nclass SoftAttn(nn.Module):\n    \"\"\"Soft Attention (Sec. 3.1.I)\n    \n    Aim: Spatial Attention + Channel Attention\n    \n    Output: attention maps with shape identical to input.\n    \"\"\"\n\n    def __init__(self, in_channels):\n        super(SoftAttn, self).__init__()\n        self.spatial_attn = SpatialAttn()\n        self.channel_attn = ChannelAttn(in_channels)\n        self.conv = ConvBlock(in_channels, in_channels, 1)\n\n    def forward(self, x):\n        y_spatial = self.spatial_attn(x)\n        y_channel = self.channel_attn(x)\n        y = y_spatial * y_channel\n        y = torch.sigmoid(self.conv(y))\n        return y\n\n\nclass HardAttn(nn.Module):\n    \"\"\"Hard Attention (Sec. 3.1.II)\"\"\"\n\n    def __init__(self, in_channels):\n        super(HardAttn, self).__init__()\n        self.fc = nn.Linear(in_channels, 4 * 2)\n        self.init_params()\n\n    def init_params(self):\n        self.fc.weight.data.zero_()\n        self.fc.bias.data.copy_(\n            torch.tensor(\n                [0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float\n            )\n        )\n\n    def forward(self, x):\n        # squeeze operation (global average pooling)\n        x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))\n        # predict transformation parameters\n        theta = torch.tanh(self.fc(x))\n        theta = theta.view(-1, 4, 2)\n        return theta\n\n\nclass HarmAttn(nn.Module):\n    \"\"\"Harmonious Attention (Sec. 3.1)\"\"\"\n\n    def __init__(self, in_channels):\n        super(HarmAttn, self).__init__()\n        self.soft_attn = SoftAttn(in_channels)\n        self.hard_attn = HardAttn(in_channels)\n\n    def forward(self, x):\n        y_soft_attn = self.soft_attn(x)\n        theta = self.hard_attn(x)\n        return y_soft_attn, theta\n\n\nclass HACNN(nn.Module):\n    \"\"\"Harmonious Attention Convolutional Neural Network.\n\n    Reference:\n        Li et al. Harmonious Attention Network for Person Re-identification. CVPR 2018.\n\n    Public keys:\n        - ``hacnn``: HACNN.\n    \"\"\"\n\n    # Args:\n    #    num_classes (int): number of classes to predict\n    #    nchannels (list): number of channels AFTER concatenation\n    #    feat_dim (int): feature dimension for a single stream\n    #    learn_region (bool): whether to learn region features (i.e. local branch)\n\n    def __init__(\n        self,\n        num_classes,\n        loss='softmax',\n        nchannels=[128, 256, 384],\n        feat_dim=512,\n        learn_region=True,\n        use_gpu=True,\n        **kwargs\n    ):\n        super(HACNN, self).__init__()\n        self.loss = loss\n        self.learn_region = learn_region\n        self.use_gpu = use_gpu\n\n        self.conv = ConvBlock(3, 32, 3, s=2, p=1)\n\n        # Construct Inception + HarmAttn blocks\n        # ============== Block 1 ==============\n        self.inception1 = nn.Sequential(\n            InceptionA(32, nchannels[0]),\n            InceptionB(nchannels[0], nchannels[0]),\n        )\n        self.ha1 = HarmAttn(nchannels[0])\n\n        # ============== Block 2 ==============\n        self.inception2 = nn.Sequential(\n            InceptionA(nchannels[0], nchannels[1]),\n            InceptionB(nchannels[1], nchannels[1]),\n        )\n        self.ha2 = HarmAttn(nchannels[1])\n\n        # ============== Block 3 ==============\n        self.inception3 = nn.Sequential(\n            InceptionA(nchannels[1], nchannels[2]),\n            InceptionB(nchannels[2], nchannels[2]),\n        )\n        self.ha3 = HarmAttn(nchannels[2])\n\n        self.fc_global = nn.Sequential(\n            nn.Linear(nchannels[2], feat_dim),\n            nn.BatchNorm1d(feat_dim),\n            nn.ReLU(),\n        )\n        self.classifier_global = nn.Linear(feat_dim, num_classes)\n\n        if self.learn_region:\n            self.init_scale_factors()\n            self.local_conv1 = InceptionB(32, nchannels[0])\n            self.local_conv2 = InceptionB(nchannels[0], nchannels[1])\n            self.local_conv3 = InceptionB(nchannels[1], nchannels[2])\n            self.fc_local = nn.Sequential(\n                nn.Linear(nchannels[2] * 4, feat_dim),\n                nn.BatchNorm1d(feat_dim),\n                nn.ReLU(),\n            )\n            self.classifier_local = nn.Linear(feat_dim, num_classes)\n            self.feat_dim = feat_dim * 2\n        else:\n            self.feat_dim = feat_dim\n\n    def init_scale_factors(self):\n        # initialize scale factors (s_w, s_h) for four regions\n        self.scale_factors = []\n        self.scale_factors.append(\n            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)\n        )\n        self.scale_factors.append(\n            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)\n        )\n        self.scale_factors.append(\n            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)\n        )\n        self.scale_factors.append(\n            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)\n        )\n\n    def stn(self, x, theta):\n        \"\"\"Performs spatial transform\n        \n        x: (batch, channel, height, width)\n        theta: (batch, 2, 3)\n        \"\"\"\n        grid = F.affine_grid(theta, x.size())\n        x = F.grid_sample(x, grid)\n        return x\n\n    def transform_theta(self, theta_i, region_idx):\n        \"\"\"Transforms theta to include (s_w, s_h), resulting in (batch, 2, 3)\"\"\"\n        scale_factors = self.scale_factors[region_idx]\n        theta = torch.zeros(theta_i.size(0), 2, 3)\n        theta[:, :, :2] = scale_factors\n        theta[:, :, -1] = theta_i\n        if self.use_gpu:\n            theta = theta.cuda()\n        return theta\n\n    def forward(self, x):\n        assert x.size(2) == 160 and x.size(3) == 64, \\\n            'Input size does not match, expected (160, 64) but got ({}, {})'.format(x.size(2), x.size(3))\n        x = self.conv(x)\n\n        # ============== Block 1 ==============\n        # global branch\n        x1 = self.inception1(x)\n        x1_attn, x1_theta = self.ha1(x1)\n        x1_out = x1 * x1_attn\n        # local branch\n        if self.learn_region:\n            x1_local_list = []\n            for region_idx in range(4):\n                x1_theta_i = x1_theta[:, region_idx, :]\n                x1_theta_i = self.transform_theta(x1_theta_i, region_idx)\n                x1_trans_i = self.stn(x, x1_theta_i)\n                x1_trans_i = F.upsample(\n                    x1_trans_i, (24, 28), mode='bilinear', align_corners=True\n                )\n                x1_local_i = self.local_conv1(x1_trans_i)\n                x1_local_list.append(x1_local_i)\n\n        # ============== Block 2 ==============\n        # Block 2\n        # global branch\n        x2 = self.inception2(x1_out)\n        x2_attn, x2_theta = self.ha2(x2)\n        x2_out = x2 * x2_attn\n        # local branch\n        if self.learn_region:\n            x2_local_list = []\n            for region_idx in range(4):\n                x2_theta_i = x2_theta[:, region_idx, :]\n                x2_theta_i = self.transform_theta(x2_theta_i, region_idx)\n                x2_trans_i = self.stn(x1_out, x2_theta_i)\n                x2_trans_i = F.upsample(\n                    x2_trans_i, (12, 14), mode='bilinear', align_corners=True\n                )\n                x2_local_i = x2_trans_i + x1_local_list[region_idx]\n                x2_local_i = self.local_conv2(x2_local_i)\n                x2_local_list.append(x2_local_i)\n\n        # ============== Block 3 ==============\n        # Block 3\n        # global branch\n        x3 = self.inception3(x2_out)\n        x3_attn, x3_theta = self.ha3(x3)\n        x3_out = x3 * x3_attn\n        # local branch\n        if self.learn_region:\n            x3_local_list = []\n            for region_idx in range(4):\n                x3_theta_i = x3_theta[:, region_idx, :]\n                x3_theta_i = self.transform_theta(x3_theta_i, region_idx)\n                x3_trans_i = self.stn(x2_out, x3_theta_i)\n                x3_trans_i = F.upsample(\n                    x3_trans_i, (6, 7), mode='bilinear', align_corners=True\n                )\n                x3_local_i = x3_trans_i + x2_local_list[region_idx]\n                x3_local_i = self.local_conv3(x3_local_i)\n                x3_local_list.append(x3_local_i)\n\n        # ============== Feature generation ==============\n        # global branch\n        x_global = F.avg_pool2d(x3_out,\n                                x3_out.size()[2:]\n                                ).view(x3_out.size(0), x3_out.size(1))\n        x_global = self.fc_global(x_global)\n        # local branch\n        if self.learn_region:\n            x_local_list = []\n            for region_idx in range(4):\n                x_local_i = x3_local_list[region_idx]\n                x_local_i = F.avg_pool2d(x_local_i,\n                                         x_local_i.size()[2:]\n                                         ).view(x_local_i.size(0), -1)\n                x_local_list.append(x_local_i)\n            x_local = torch.cat(x_local_list, 1)\n            x_local = self.fc_local(x_local)\n\n        if not self.training:\n            # l2 normalization before concatenation\n            if self.learn_region:\n                x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)\n                x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)\n                return torch.cat([x_global, x_local], 1)\n            else:\n                return x_global\n\n        prelogits_global = self.classifier_global(x_global)\n        if self.learn_region:\n            prelogits_local = self.classifier_local(x_local)\n\n        if self.loss == 'softmax':\n            if self.learn_region:\n                return (prelogits_global, prelogits_local)\n            else:\n                return prelogits_global\n\n        elif self.loss == 'triplet':\n            if self.learn_region:\n                return (prelogits_global, prelogits_local), (x_global, x_local)\n            else:\n                return prelogits_global, x_global\n\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/inceptionresnetv2.py",
    "content": "\"\"\"\nCode imported from https://github.com/Cadene/pretrained-models.pytorch\n\"\"\"\nfrom __future__ import division, absolute_import\nimport torch\nimport torch.nn as nn\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['inceptionresnetv2']\n\npretrained_settings = {\n    'inceptionresnetv2': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 299, 299],\n            'input_range': [0, 1],\n            'mean': [0.5, 0.5, 0.5],\n            'std': [0.5, 0.5, 0.5],\n            'num_classes': 1000\n        },\n        'imagenet+background': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/inceptionresnetv2-520b38e4.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 299, 299],\n            'input_range': [0, 1],\n            'mean': [0.5, 0.5, 0.5],\n            'std': [0.5, 0.5, 0.5],\n            'num_classes': 1001\n        }\n    }\n}\n\n\nclass BasicConv2d(nn.Module):\n\n    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):\n        super(BasicConv2d, self).__init__()\n        self.conv = nn.Conv2d(\n            in_planes,\n            out_planes,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            bias=False\n        ) # verify bias false\n        self.bn = nn.BatchNorm2d(\n            out_planes,\n            eps=0.001, # value found in tensorflow\n            momentum=0.1, # default pytorch value\n            affine=True\n        )\n        self.relu = nn.ReLU(inplace=False)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        x = self.relu(x)\n        return x\n\n\nclass Mixed_5b(nn.Module):\n\n    def __init__(self):\n        super(Mixed_5b, self).__init__()\n\n        self.branch0 = BasicConv2d(192, 96, kernel_size=1, stride=1)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(192, 48, kernel_size=1, stride=1),\n            BasicConv2d(48, 64, kernel_size=5, stride=1, padding=2)\n        )\n\n        self.branch2 = nn.Sequential(\n            BasicConv2d(192, 64, kernel_size=1, stride=1),\n            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),\n            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)\n        )\n\n        self.branch3 = nn.Sequential(\n            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),\n            BasicConv2d(192, 64, kernel_size=1, stride=1)\n        )\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        x3 = self.branch3(x)\n        out = torch.cat((x0, x1, x2, x3), 1)\n        return out\n\n\nclass Block35(nn.Module):\n\n    def __init__(self, scale=1.0):\n        super(Block35, self).__init__()\n\n        self.scale = scale\n\n        self.branch0 = BasicConv2d(320, 32, kernel_size=1, stride=1)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(320, 32, kernel_size=1, stride=1),\n            BasicConv2d(32, 32, kernel_size=3, stride=1, padding=1)\n        )\n\n        self.branch2 = nn.Sequential(\n            BasicConv2d(320, 32, kernel_size=1, stride=1),\n            BasicConv2d(32, 48, kernel_size=3, stride=1, padding=1),\n            BasicConv2d(48, 64, kernel_size=3, stride=1, padding=1)\n        )\n\n        self.conv2d = nn.Conv2d(128, 320, kernel_size=1, stride=1)\n        self.relu = nn.ReLU(inplace=False)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        out = torch.cat((x0, x1, x2), 1)\n        out = self.conv2d(out)\n        out = out * self.scale + x\n        out = self.relu(out)\n        return out\n\n\nclass Mixed_6a(nn.Module):\n\n    def __init__(self):\n        super(Mixed_6a, self).__init__()\n\n        self.branch0 = BasicConv2d(320, 384, kernel_size=3, stride=2)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(320, 256, kernel_size=1, stride=1),\n            BasicConv2d(256, 256, kernel_size=3, stride=1, padding=1),\n            BasicConv2d(256, 384, kernel_size=3, stride=2)\n        )\n\n        self.branch2 = nn.MaxPool2d(3, stride=2)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        out = torch.cat((x0, x1, x2), 1)\n        return out\n\n\nclass Block17(nn.Module):\n\n    def __init__(self, scale=1.0):\n        super(Block17, self).__init__()\n\n        self.scale = scale\n\n        self.branch0 = BasicConv2d(1088, 192, kernel_size=1, stride=1)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(1088, 128, kernel_size=1, stride=1),\n            BasicConv2d(\n                128, 160, kernel_size=(1, 7), stride=1, padding=(0, 3)\n            ),\n            BasicConv2d(\n                160, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)\n            )\n        )\n\n        self.conv2d = nn.Conv2d(384, 1088, kernel_size=1, stride=1)\n        self.relu = nn.ReLU(inplace=False)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        out = torch.cat((x0, x1), 1)\n        out = self.conv2d(out)\n        out = out * self.scale + x\n        out = self.relu(out)\n        return out\n\n\nclass Mixed_7a(nn.Module):\n\n    def __init__(self):\n        super(Mixed_7a, self).__init__()\n\n        self.branch0 = nn.Sequential(\n            BasicConv2d(1088, 256, kernel_size=1, stride=1),\n            BasicConv2d(256, 384, kernel_size=3, stride=2)\n        )\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(1088, 256, kernel_size=1, stride=1),\n            BasicConv2d(256, 288, kernel_size=3, stride=2)\n        )\n\n        self.branch2 = nn.Sequential(\n            BasicConv2d(1088, 256, kernel_size=1, stride=1),\n            BasicConv2d(256, 288, kernel_size=3, stride=1, padding=1),\n            BasicConv2d(288, 320, kernel_size=3, stride=2)\n        )\n\n        self.branch3 = nn.MaxPool2d(3, stride=2)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        x3 = self.branch3(x)\n        out = torch.cat((x0, x1, x2, x3), 1)\n        return out\n\n\nclass Block8(nn.Module):\n\n    def __init__(self, scale=1.0, noReLU=False):\n        super(Block8, self).__init__()\n\n        self.scale = scale\n        self.noReLU = noReLU\n\n        self.branch0 = BasicConv2d(2080, 192, kernel_size=1, stride=1)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(2080, 192, kernel_size=1, stride=1),\n            BasicConv2d(\n                192, 224, kernel_size=(1, 3), stride=1, padding=(0, 1)\n            ),\n            BasicConv2d(\n                224, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)\n            )\n        )\n\n        self.conv2d = nn.Conv2d(448, 2080, kernel_size=1, stride=1)\n        if not self.noReLU:\n            self.relu = nn.ReLU(inplace=False)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        out = torch.cat((x0, x1), 1)\n        out = self.conv2d(out)\n        out = out * self.scale + x\n        if not self.noReLU:\n            out = self.relu(out)\n        return out\n\n\n# ----------------\n# Model Definition\n# ----------------\nclass InceptionResNetV2(nn.Module):\n    \"\"\"Inception-ResNet-V2.\n\n    Reference:\n        Szegedy et al. Inception-v4, Inception-ResNet and the Impact of Residual\n        Connections on Learning. AAAI 2017.\n\n    Public keys:\n        - ``inceptionresnetv2``: Inception-ResNet-V2.\n    \"\"\"\n\n    def __init__(self, num_classes, loss='softmax', **kwargs):\n        super(InceptionResNetV2, self).__init__()\n        self.loss = loss\n\n        # Modules\n        self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2)\n        self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1)\n        self.conv2d_2b = BasicConv2d(\n            32, 64, kernel_size=3, stride=1, padding=1\n        )\n        self.maxpool_3a = nn.MaxPool2d(3, stride=2)\n        self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1)\n        self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1)\n        self.maxpool_5a = nn.MaxPool2d(3, stride=2)\n        self.mixed_5b = Mixed_5b()\n        self.repeat = nn.Sequential(\n            Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17),\n            Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17),\n            Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17),\n            Block35(scale=0.17)\n        )\n        self.mixed_6a = Mixed_6a()\n        self.repeat_1 = nn.Sequential(\n            Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),\n            Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),\n            Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),\n            Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),\n            Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),\n            Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10),\n            Block17(scale=0.10), Block17(scale=0.10)\n        )\n        self.mixed_7a = Mixed_7a()\n        self.repeat_2 = nn.Sequential(\n            Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20),\n            Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20),\n            Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20)\n        )\n\n        self.block8 = Block8(noReLU=True)\n        self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1)\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.classifier = nn.Linear(1536, num_classes)\n\n    def load_imagenet_weights(self):\n        settings = pretrained_settings['inceptionresnetv2']['imagenet']\n        pretrain_dict = model_zoo.load_url(settings['url'])\n        model_dict = self.state_dict()\n        pretrain_dict = {\n            k: v\n            for k, v in pretrain_dict.items()\n            if k in model_dict and model_dict[k].size() == v.size()\n        }\n        model_dict.update(pretrain_dict)\n        self.load_state_dict(model_dict)\n\n    def featuremaps(self, x):\n        x = self.conv2d_1a(x)\n        x = self.conv2d_2a(x)\n        x = self.conv2d_2b(x)\n        x = self.maxpool_3a(x)\n        x = self.conv2d_3b(x)\n        x = self.conv2d_4a(x)\n        x = self.maxpool_5a(x)\n        x = self.mixed_5b(x)\n        x = self.repeat(x)\n        x = self.mixed_6a(x)\n        x = self.repeat_1(x)\n        x = self.mixed_7a(x)\n        x = self.repeat_2(x)\n        x = self.block8(x)\n        x = self.conv2d_7b(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef inceptionresnetv2(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = InceptionResNetV2(num_classes=num_classes, loss=loss, **kwargs)\n    if pretrained:\n        model.load_imagenet_weights()\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/inceptionv4.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nimport torch.nn as nn\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['inceptionv4']\n\"\"\"\nCode imported from https://github.com/Cadene/pretrained-models.pytorch\n\"\"\"\n\npretrained_settings = {\n    'inceptionv4': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 299, 299],\n            'input_range': [0, 1],\n            'mean': [0.5, 0.5, 0.5],\n            'std': [0.5, 0.5, 0.5],\n            'num_classes': 1000\n        },\n        'imagenet+background': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/inceptionv4-8e4777a0.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 299, 299],\n            'input_range': [0, 1],\n            'mean': [0.5, 0.5, 0.5],\n            'std': [0.5, 0.5, 0.5],\n            'num_classes': 1001\n        }\n    }\n}\n\n\nclass BasicConv2d(nn.Module):\n\n    def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0):\n        super(BasicConv2d, self).__init__()\n        self.conv = nn.Conv2d(\n            in_planes,\n            out_planes,\n            kernel_size=kernel_size,\n            stride=stride,\n            padding=padding,\n            bias=False\n        ) # verify bias false\n        self.bn = nn.BatchNorm2d(\n            out_planes,\n            eps=0.001, # value found in tensorflow\n            momentum=0.1, # default pytorch value\n            affine=True\n        )\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        x = self.relu(x)\n        return x\n\n\nclass Mixed_3a(nn.Module):\n\n    def __init__(self):\n        super(Mixed_3a, self).__init__()\n        self.maxpool = nn.MaxPool2d(3, stride=2)\n        self.conv = BasicConv2d(64, 96, kernel_size=3, stride=2)\n\n    def forward(self, x):\n        x0 = self.maxpool(x)\n        x1 = self.conv(x)\n        out = torch.cat((x0, x1), 1)\n        return out\n\n\nclass Mixed_4a(nn.Module):\n\n    def __init__(self):\n        super(Mixed_4a, self).__init__()\n\n        self.branch0 = nn.Sequential(\n            BasicConv2d(160, 64, kernel_size=1, stride=1),\n            BasicConv2d(64, 96, kernel_size=3, stride=1)\n        )\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(160, 64, kernel_size=1, stride=1),\n            BasicConv2d(64, 64, kernel_size=(1, 7), stride=1, padding=(0, 3)),\n            BasicConv2d(64, 64, kernel_size=(7, 1), stride=1, padding=(3, 0)),\n            BasicConv2d(64, 96, kernel_size=(3, 3), stride=1)\n        )\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        out = torch.cat((x0, x1), 1)\n        return out\n\n\nclass Mixed_5a(nn.Module):\n\n    def __init__(self):\n        super(Mixed_5a, self).__init__()\n        self.conv = BasicConv2d(192, 192, kernel_size=3, stride=2)\n        self.maxpool = nn.MaxPool2d(3, stride=2)\n\n    def forward(self, x):\n        x0 = self.conv(x)\n        x1 = self.maxpool(x)\n        out = torch.cat((x0, x1), 1)\n        return out\n\n\nclass Inception_A(nn.Module):\n\n    def __init__(self):\n        super(Inception_A, self).__init__()\n        self.branch0 = BasicConv2d(384, 96, kernel_size=1, stride=1)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(384, 64, kernel_size=1, stride=1),\n            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1)\n        )\n\n        self.branch2 = nn.Sequential(\n            BasicConv2d(384, 64, kernel_size=1, stride=1),\n            BasicConv2d(64, 96, kernel_size=3, stride=1, padding=1),\n            BasicConv2d(96, 96, kernel_size=3, stride=1, padding=1)\n        )\n\n        self.branch3 = nn.Sequential(\n            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),\n            BasicConv2d(384, 96, kernel_size=1, stride=1)\n        )\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        x3 = self.branch3(x)\n        out = torch.cat((x0, x1, x2, x3), 1)\n        return out\n\n\nclass Reduction_A(nn.Module):\n\n    def __init__(self):\n        super(Reduction_A, self).__init__()\n        self.branch0 = BasicConv2d(384, 384, kernel_size=3, stride=2)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(384, 192, kernel_size=1, stride=1),\n            BasicConv2d(192, 224, kernel_size=3, stride=1, padding=1),\n            BasicConv2d(224, 256, kernel_size=3, stride=2)\n        )\n\n        self.branch2 = nn.MaxPool2d(3, stride=2)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        out = torch.cat((x0, x1, x2), 1)\n        return out\n\n\nclass Inception_B(nn.Module):\n\n    def __init__(self):\n        super(Inception_B, self).__init__()\n        self.branch0 = BasicConv2d(1024, 384, kernel_size=1, stride=1)\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(1024, 192, kernel_size=1, stride=1),\n            BasicConv2d(\n                192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)\n            ),\n            BasicConv2d(\n                224, 256, kernel_size=(7, 1), stride=1, padding=(3, 0)\n            )\n        )\n\n        self.branch2 = nn.Sequential(\n            BasicConv2d(1024, 192, kernel_size=1, stride=1),\n            BasicConv2d(\n                192, 192, kernel_size=(7, 1), stride=1, padding=(3, 0)\n            ),\n            BasicConv2d(\n                192, 224, kernel_size=(1, 7), stride=1, padding=(0, 3)\n            ),\n            BasicConv2d(\n                224, 224, kernel_size=(7, 1), stride=1, padding=(3, 0)\n            ),\n            BasicConv2d(\n                224, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)\n            )\n        )\n\n        self.branch3 = nn.Sequential(\n            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),\n            BasicConv2d(1024, 128, kernel_size=1, stride=1)\n        )\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        x3 = self.branch3(x)\n        out = torch.cat((x0, x1, x2, x3), 1)\n        return out\n\n\nclass Reduction_B(nn.Module):\n\n    def __init__(self):\n        super(Reduction_B, self).__init__()\n\n        self.branch0 = nn.Sequential(\n            BasicConv2d(1024, 192, kernel_size=1, stride=1),\n            BasicConv2d(192, 192, kernel_size=3, stride=2)\n        )\n\n        self.branch1 = nn.Sequential(\n            BasicConv2d(1024, 256, kernel_size=1, stride=1),\n            BasicConv2d(\n                256, 256, kernel_size=(1, 7), stride=1, padding=(0, 3)\n            ),\n            BasicConv2d(\n                256, 320, kernel_size=(7, 1), stride=1, padding=(3, 0)\n            ), BasicConv2d(320, 320, kernel_size=3, stride=2)\n        )\n\n        self.branch2 = nn.MaxPool2d(3, stride=2)\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n        x1 = self.branch1(x)\n        x2 = self.branch2(x)\n        out = torch.cat((x0, x1, x2), 1)\n        return out\n\n\nclass Inception_C(nn.Module):\n\n    def __init__(self):\n        super(Inception_C, self).__init__()\n\n        self.branch0 = BasicConv2d(1536, 256, kernel_size=1, stride=1)\n\n        self.branch1_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)\n        self.branch1_1a = BasicConv2d(\n            384, 256, kernel_size=(1, 3), stride=1, padding=(0, 1)\n        )\n        self.branch1_1b = BasicConv2d(\n            384, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)\n        )\n\n        self.branch2_0 = BasicConv2d(1536, 384, kernel_size=1, stride=1)\n        self.branch2_1 = BasicConv2d(\n            384, 448, kernel_size=(3, 1), stride=1, padding=(1, 0)\n        )\n        self.branch2_2 = BasicConv2d(\n            448, 512, kernel_size=(1, 3), stride=1, padding=(0, 1)\n        )\n        self.branch2_3a = BasicConv2d(\n            512, 256, kernel_size=(1, 3), stride=1, padding=(0, 1)\n        )\n        self.branch2_3b = BasicConv2d(\n            512, 256, kernel_size=(3, 1), stride=1, padding=(1, 0)\n        )\n\n        self.branch3 = nn.Sequential(\n            nn.AvgPool2d(3, stride=1, padding=1, count_include_pad=False),\n            BasicConv2d(1536, 256, kernel_size=1, stride=1)\n        )\n\n    def forward(self, x):\n        x0 = self.branch0(x)\n\n        x1_0 = self.branch1_0(x)\n        x1_1a = self.branch1_1a(x1_0)\n        x1_1b = self.branch1_1b(x1_0)\n        x1 = torch.cat((x1_1a, x1_1b), 1)\n\n        x2_0 = self.branch2_0(x)\n        x2_1 = self.branch2_1(x2_0)\n        x2_2 = self.branch2_2(x2_1)\n        x2_3a = self.branch2_3a(x2_2)\n        x2_3b = self.branch2_3b(x2_2)\n        x2 = torch.cat((x2_3a, x2_3b), 1)\n\n        x3 = self.branch3(x)\n\n        out = torch.cat((x0, x1, x2, x3), 1)\n        return out\n\n\nclass InceptionV4(nn.Module):\n    \"\"\"Inception-v4.\n\n    Reference:\n        Szegedy et al. Inception-v4, Inception-ResNet and the Impact of Residual\n        Connections on Learning. AAAI 2017.\n\n    Public keys:\n        - ``inceptionv4``: InceptionV4.\n    \"\"\"\n\n    def __init__(self, num_classes, loss, **kwargs):\n        super(InceptionV4, self).__init__()\n        self.loss = loss\n\n        self.features = nn.Sequential(\n            BasicConv2d(3, 32, kernel_size=3, stride=2),\n            BasicConv2d(32, 32, kernel_size=3, stride=1),\n            BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1),\n            Mixed_3a(),\n            Mixed_4a(),\n            Mixed_5a(),\n            Inception_A(),\n            Inception_A(),\n            Inception_A(),\n            Inception_A(),\n            Reduction_A(), # Mixed_6a\n            Inception_B(),\n            Inception_B(),\n            Inception_B(),\n            Inception_B(),\n            Inception_B(),\n            Inception_B(),\n            Inception_B(),\n            Reduction_B(), # Mixed_7a\n            Inception_C(),\n            Inception_C(),\n            Inception_C()\n        )\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.classifier = nn.Linear(1536, num_classes)\n\n    def forward(self, x):\n        f = self.features(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef inceptionv4(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = InceptionV4(num_classes, loss, **kwargs)\n    if pretrained:\n        model_url = pretrained_settings['inceptionv4']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/mlfn.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = ['mlfn']\n\nmodel_urls = {\n    # training epoch = 5, top1 = 51.6\n    'imagenet':\n    'https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk',\n}\n\n\nclass MLFNBlock(nn.Module):\n\n    def __init__(\n        self, in_channels, out_channels, stride, fsm_channels, groups=32\n    ):\n        super(MLFNBlock, self).__init__()\n        self.groups = groups\n        mid_channels = out_channels // 2\n\n        # Factor Modules\n        self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False)\n        self.fm_bn1 = nn.BatchNorm2d(mid_channels)\n        self.fm_conv2 = nn.Conv2d(\n            mid_channels,\n            mid_channels,\n            3,\n            stride=stride,\n            padding=1,\n            bias=False,\n            groups=self.groups\n        )\n        self.fm_bn2 = nn.BatchNorm2d(mid_channels)\n        self.fm_conv3 = nn.Conv2d(mid_channels, out_channels, 1, bias=False)\n        self.fm_bn3 = nn.BatchNorm2d(out_channels)\n\n        # Factor Selection Module\n        self.fsm = nn.Sequential(\n            nn.AdaptiveAvgPool2d(1),\n            nn.Conv2d(in_channels, fsm_channels[0], 1),\n            nn.BatchNorm2d(fsm_channels[0]),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(fsm_channels[0], fsm_channels[1], 1),\n            nn.BatchNorm2d(fsm_channels[1]),\n            nn.ReLU(inplace=True),\n            nn.Conv2d(fsm_channels[1], self.groups, 1),\n            nn.BatchNorm2d(self.groups),\n            nn.Sigmoid(),\n        )\n\n        self.downsample = None\n        if in_channels != out_channels or stride > 1:\n            self.downsample = nn.Sequential(\n                nn.Conv2d(\n                    in_channels, out_channels, 1, stride=stride, bias=False\n                ),\n                nn.BatchNorm2d(out_channels),\n            )\n\n    def forward(self, x):\n        residual = x\n        s = self.fsm(x)\n\n        # reduce dimension\n        x = self.fm_conv1(x)\n        x = self.fm_bn1(x)\n        x = F.relu(x, inplace=True)\n\n        # group convolution\n        x = self.fm_conv2(x)\n        x = self.fm_bn2(x)\n        x = F.relu(x, inplace=True)\n\n        # factor selection\n        b, c = x.size(0), x.size(1)\n        n = c // self.groups\n        ss = s.repeat(1, n, 1, 1) # from (b, g, 1, 1) to (b, g*n=c, 1, 1)\n        ss = ss.view(b, n, self.groups, 1, 1)\n        ss = ss.permute(0, 2, 1, 3, 4).contiguous()\n        ss = ss.view(b, c, 1, 1)\n        x = ss * x\n\n        # recover dimension\n        x = self.fm_conv3(x)\n        x = self.fm_bn3(x)\n        x = F.relu(x, inplace=True)\n\n        if self.downsample is not None:\n            residual = self.downsample(residual)\n\n        return F.relu(residual + x, inplace=True), s\n\n\nclass MLFN(nn.Module):\n    \"\"\"Multi-Level Factorisation Net.\n\n    Reference:\n        Chang et al. Multi-Level Factorisation Net for\n        Person Re-Identification. CVPR 2018.\n\n    Public keys:\n        - ``mlfn``: MLFN (Multi-Level Factorisation Net).\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss='softmax',\n        groups=32,\n        channels=[64, 256, 512, 1024, 2048],\n        embed_dim=1024,\n        **kwargs\n    ):\n        super(MLFN, self).__init__()\n        self.loss = loss\n        self.groups = groups\n\n        # first convolutional layer\n        self.conv1 = nn.Conv2d(3, channels[0], 7, stride=2, padding=3)\n        self.bn1 = nn.BatchNorm2d(channels[0])\n        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)\n\n        # main body\n        self.feature = nn.ModuleList(\n            [\n                # layer 1-3\n                MLFNBlock(channels[0], channels[1], 1, [128, 64], self.groups),\n                MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),\n                MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),\n                # layer 4-7\n                MLFNBlock(\n                    channels[1], channels[2], 2, [256, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[2], channels[2], 1, [256, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[2], channels[2], 1, [256, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[2], channels[2], 1, [256, 128], self.groups\n                ),\n                # layer 8-13\n                MLFNBlock(\n                    channels[2], channels[3], 2, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[3], channels[3], 1, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[3], channels[3], 1, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[3], channels[3], 1, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[3], channels[3], 1, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[3], channels[3], 1, [512, 128], self.groups\n                ),\n                # layer 14-16\n                MLFNBlock(\n                    channels[3], channels[4], 2, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[4], channels[4], 1, [512, 128], self.groups\n                ),\n                MLFNBlock(\n                    channels[4], channels[4], 1, [512, 128], self.groups\n                ),\n            ]\n        )\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n\n        # projection functions\n        self.fc_x = nn.Sequential(\n            nn.Conv2d(channels[4], embed_dim, 1, bias=False),\n            nn.BatchNorm2d(embed_dim),\n            nn.ReLU(inplace=True),\n        )\n        self.fc_s = nn.Sequential(\n            nn.Conv2d(self.groups * 16, embed_dim, 1, bias=False),\n            nn.BatchNorm2d(embed_dim),\n            nn.ReLU(inplace=True),\n        )\n\n        self.classifier = nn.Linear(embed_dim, num_classes)\n\n        self.init_params()\n\n    def init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = F.relu(x, inplace=True)\n        x = self.maxpool(x)\n\n        s_hat = []\n        for block in self.feature:\n            x, s = block(x)\n            s_hat.append(s)\n        s_hat = torch.cat(s_hat, 1)\n\n        x = self.global_avgpool(x)\n        x = self.fc_x(x)\n        s_hat = self.fc_s(s_hat)\n\n        v = (x+s_hat) * 0.5\n        v = v.view(v.size(0), -1)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef mlfn(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = MLFN(num_classes, loss, **kwargs)\n    if pretrained:\n        # init_pretrained_weights(model, model_urls['imagenet'])\n        import warnings\n        warnings.warn(\n            'The imagenet pretrained weights need to be manually downloaded from {}'\n            .format(model_urls['imagenet'])\n        )\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/mobilenetv2.py",
    "content": "from __future__ import division, absolute_import\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = ['mobilenetv2_x1_0', 'mobilenetv2_x1_4']\n\nmodel_urls = {\n    # 1.0: top-1 71.3\n    'mobilenetv2_x1_0':\n    'https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c',\n    # 1.4: top-1 73.9\n    'mobilenetv2_x1_4':\n    'https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk',\n}\n\n\nclass ConvBlock(nn.Module):\n    \"\"\"Basic convolutional block.\n    \n    convolution (bias discarded) + batch normalization + relu6.\n\n    Args:\n        in_c (int): number of input channels.\n        out_c (int): number of output channels.\n        k (int or tuple): kernel size.\n        s (int or tuple): stride.\n        p (int or tuple): padding.\n        g (int): number of blocked connections from input channels\n            to output channels (default: 1).\n    \"\"\"\n\n    def __init__(self, in_c, out_c, k, s=1, p=0, g=1):\n        super(ConvBlock, self).__init__()\n        self.conv = nn.Conv2d(\n            in_c, out_c, k, stride=s, padding=p, bias=False, groups=g\n        )\n        self.bn = nn.BatchNorm2d(out_c)\n\n    def forward(self, x):\n        return F.relu6(self.bn(self.conv(x)))\n\n\nclass Bottleneck(nn.Module):\n\n    def __init__(self, in_channels, out_channels, expansion_factor, stride=1):\n        super(Bottleneck, self).__init__()\n        mid_channels = in_channels * expansion_factor\n        self.use_residual = stride == 1 and in_channels == out_channels\n        self.conv1 = ConvBlock(in_channels, mid_channels, 1)\n        self.dwconv2 = ConvBlock(\n            mid_channels, mid_channels, 3, stride, 1, g=mid_channels\n        )\n        self.conv3 = nn.Sequential(\n            nn.Conv2d(mid_channels, out_channels, 1, bias=False),\n            nn.BatchNorm2d(out_channels),\n        )\n\n    def forward(self, x):\n        m = self.conv1(x)\n        m = self.dwconv2(m)\n        m = self.conv3(m)\n        if self.use_residual:\n            return x + m\n        else:\n            return m\n\n\nclass MobileNetV2(nn.Module):\n    \"\"\"MobileNetV2.\n\n    Reference:\n        Sandler et al. MobileNetV2: Inverted Residuals and\n        Linear Bottlenecks. CVPR 2018.\n\n    Public keys:\n        - ``mobilenetv2_x1_0``: MobileNetV2 x1.0.\n        - ``mobilenetv2_x1_4``: MobileNetV2 x1.4.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        width_mult=1,\n        loss='softmax',\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    ):\n        super(MobileNetV2, self).__init__()\n        self.loss = loss\n        self.in_channels = int(32 * width_mult)\n        self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280\n\n        # construct layers\n        self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)\n        self.conv2 = self._make_layer(\n            Bottleneck, 1, int(16 * width_mult), 1, 1\n        )\n        self.conv3 = self._make_layer(\n            Bottleneck, 6, int(24 * width_mult), 2, 2\n        )\n        self.conv4 = self._make_layer(\n            Bottleneck, 6, int(32 * width_mult), 3, 2\n        )\n        self.conv5 = self._make_layer(\n            Bottleneck, 6, int(64 * width_mult), 4, 2\n        )\n        self.conv6 = self._make_layer(\n            Bottleneck, 6, int(96 * width_mult), 3, 1\n        )\n        self.conv7 = self._make_layer(\n            Bottleneck, 6, int(160 * width_mult), 3, 2\n        )\n        self.conv8 = self._make_layer(\n            Bottleneck, 6, int(320 * width_mult), 1, 1\n        )\n        self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)\n\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.fc = self._construct_fc_layer(\n            fc_dims, self.feature_dim, dropout_p\n        )\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _make_layer(self, block, t, c, n, s):\n        # t: expansion factor\n        # c: output channels\n        # n: number of blocks\n        # s: stride for first layer\n        layers = []\n        layers.append(block(self.in_channels, c, t, s))\n        self.in_channels = c\n        for i in range(1, n):\n            layers.append(block(self.in_channels, c, t))\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer.\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.conv2(x)\n        x = self.conv3(x)\n        x = self.conv4(x)\n        x = self.conv5(x)\n        x = self.conv6(x)\n        x = self.conv7(x)\n        x = self.conv8(x)\n        x = self.conv9(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if self.fc is not None:\n            v = self.fc(v)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs):\n    model = MobileNetV2(\n        num_classes,\n        loss=loss,\n        width_mult=1,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])\n        import warnings\n        warnings.warn(\n            'The imagenet pretrained weights need to be manually downloaded from {}'\n            .format(model_urls['mobilenetv2_x1_0'])\n        )\n    return model\n\n\ndef mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):\n    model = MobileNetV2(\n        num_classes,\n        loss=loss,\n        width_mult=1.4,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])\n        import warnings\n        warnings.warn(\n            'The imagenet pretrained weights need to be manually downloaded from {}'\n            .format(model_urls['mobilenetv2_x1_4'])\n        )\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/mudeep.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = ['MuDeep']\n\n\nclass ConvBlock(nn.Module):\n    \"\"\"Basic convolutional block.\n    \n    convolution + batch normalization + relu.\n\n    Args:\n        in_c (int): number of input channels.\n        out_c (int): number of output channels.\n        k (int or tuple): kernel size.\n        s (int or tuple): stride.\n        p (int or tuple): padding.\n    \"\"\"\n\n    def __init__(self, in_c, out_c, k, s, p):\n        super(ConvBlock, self).__init__()\n        self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)\n        self.bn = nn.BatchNorm2d(out_c)\n\n    def forward(self, x):\n        return F.relu(self.bn(self.conv(x)))\n\n\nclass ConvLayers(nn.Module):\n    \"\"\"Preprocessing layers.\"\"\"\n\n    def __init__(self):\n        super(ConvLayers, self).__init__()\n        self.conv1 = ConvBlock(3, 48, k=3, s=1, p=1)\n        self.conv2 = ConvBlock(48, 96, k=3, s=1, p=1)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.conv2(x)\n        x = self.maxpool(x)\n        return x\n\n\nclass MultiScaleA(nn.Module):\n    \"\"\"Multi-scale stream layer A (Sec.3.1)\"\"\"\n\n    def __init__(self):\n        super(MultiScaleA, self).__init__()\n        self.stream1 = nn.Sequential(\n            ConvBlock(96, 96, k=1, s=1, p=0),\n            ConvBlock(96, 24, k=3, s=1, p=1),\n        )\n        self.stream2 = nn.Sequential(\n            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),\n            ConvBlock(96, 24, k=1, s=1, p=0),\n        )\n        self.stream3 = ConvBlock(96, 24, k=1, s=1, p=0)\n        self.stream4 = nn.Sequential(\n            ConvBlock(96, 16, k=1, s=1, p=0),\n            ConvBlock(16, 24, k=3, s=1, p=1),\n            ConvBlock(24, 24, k=3, s=1, p=1),\n        )\n\n    def forward(self, x):\n        s1 = self.stream1(x)\n        s2 = self.stream2(x)\n        s3 = self.stream3(x)\n        s4 = self.stream4(x)\n        y = torch.cat([s1, s2, s3, s4], dim=1)\n        return y\n\n\nclass Reduction(nn.Module):\n    \"\"\"Reduction layer (Sec.3.1)\"\"\"\n\n    def __init__(self):\n        super(Reduction, self).__init__()\n        self.stream1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.stream2 = ConvBlock(96, 96, k=3, s=2, p=1)\n        self.stream3 = nn.Sequential(\n            ConvBlock(96, 48, k=1, s=1, p=0),\n            ConvBlock(48, 56, k=3, s=1, p=1),\n            ConvBlock(56, 64, k=3, s=2, p=1),\n        )\n\n    def forward(self, x):\n        s1 = self.stream1(x)\n        s2 = self.stream2(x)\n        s3 = self.stream3(x)\n        y = torch.cat([s1, s2, s3], dim=1)\n        return y\n\n\nclass MultiScaleB(nn.Module):\n    \"\"\"Multi-scale stream layer B (Sec.3.1)\"\"\"\n\n    def __init__(self):\n        super(MultiScaleB, self).__init__()\n        self.stream1 = nn.Sequential(\n            nn.AvgPool2d(kernel_size=3, stride=1, padding=1),\n            ConvBlock(256, 256, k=1, s=1, p=0),\n        )\n        self.stream2 = nn.Sequential(\n            ConvBlock(256, 64, k=1, s=1, p=0),\n            ConvBlock(64, 128, k=(1, 3), s=1, p=(0, 1)),\n            ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),\n        )\n        self.stream3 = ConvBlock(256, 256, k=1, s=1, p=0)\n        self.stream4 = nn.Sequential(\n            ConvBlock(256, 64, k=1, s=1, p=0),\n            ConvBlock(64, 64, k=(1, 3), s=1, p=(0, 1)),\n            ConvBlock(64, 128, k=(3, 1), s=1, p=(1, 0)),\n            ConvBlock(128, 128, k=(1, 3), s=1, p=(0, 1)),\n            ConvBlock(128, 256, k=(3, 1), s=1, p=(1, 0)),\n        )\n\n    def forward(self, x):\n        s1 = self.stream1(x)\n        s2 = self.stream2(x)\n        s3 = self.stream3(x)\n        s4 = self.stream4(x)\n        return s1, s2, s3, s4\n\n\nclass Fusion(nn.Module):\n    \"\"\"Saliency-based learning fusion layer (Sec.3.2)\"\"\"\n\n    def __init__(self):\n        super(Fusion, self).__init__()\n        self.a1 = nn.Parameter(torch.rand(1, 256, 1, 1))\n        self.a2 = nn.Parameter(torch.rand(1, 256, 1, 1))\n        self.a3 = nn.Parameter(torch.rand(1, 256, 1, 1))\n        self.a4 = nn.Parameter(torch.rand(1, 256, 1, 1))\n\n        # We add an average pooling layer to reduce the spatial dimension\n        # of feature maps, which differs from the original paper.\n        self.avgpool = nn.AvgPool2d(kernel_size=4, stride=4, padding=0)\n\n    def forward(self, x1, x2, x3, x4):\n        s1 = self.a1.expand_as(x1) * x1\n        s2 = self.a2.expand_as(x2) * x2\n        s3 = self.a3.expand_as(x3) * x3\n        s4 = self.a4.expand_as(x4) * x4\n        y = self.avgpool(s1 + s2 + s3 + s4)\n        return y\n\n\nclass MuDeep(nn.Module):\n    \"\"\"Multiscale deep neural network.\n\n    Reference:\n        Qian et al. Multi-scale Deep Learning Architectures\n        for Person Re-identification. ICCV 2017.\n\n    Public keys:\n        - ``mudeep``: Multiscale deep neural network.\n    \"\"\"\n\n    def __init__(self, num_classes, loss='softmax', **kwargs):\n        super(MuDeep, self).__init__()\n        self.loss = loss\n\n        self.block1 = ConvLayers()\n        self.block2 = MultiScaleA()\n        self.block3 = Reduction()\n        self.block4 = MultiScaleB()\n        self.block5 = Fusion()\n\n        # Due to this fully connected layer, input image has to be fixed\n        # in shape, i.e. (3, 256, 128), such that the last convolutional feature\n        # maps are of shape (256, 16, 8). If input shape is changed,\n        # the input dimension of this layer has to be changed accordingly.\n        self.fc = nn.Sequential(\n            nn.Linear(256 * 16 * 8, 4096),\n            nn.BatchNorm1d(4096),\n            nn.ReLU(),\n        )\n        self.classifier = nn.Linear(4096, num_classes)\n        self.feat_dim = 4096\n\n    def featuremaps(self, x):\n        x = self.block1(x)\n        x = self.block2(x)\n        x = self.block3(x)\n        x = self.block4(x)\n        x = self.block5(*x)\n        return x\n\n    def forward(self, x):\n        x = self.featuremaps(x)\n        x = x.view(x.size(0), -1)\n        x = self.fc(x)\n        y = self.classifier(x)\n\n        if not self.training:\n            return x\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, x\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/nasnet.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['nasnetamobile']\n\"\"\"\nNASNet Mobile\nThanks to Anastasiia (https://github.com/DagnyT) for the great help, support and motivation!\n\n\n------------------------------------------------------------------------------------\n      Architecture       | Top-1 Acc | Top-5 Acc |  Multiply-Adds |  Params (M)\n------------------------------------------------------------------------------------\n|   NASNet-A (4 @ 1056)  |   74.08%  |   91.74%  |       564 M    |     5.3        |\n------------------------------------------------------------------------------------\n# References:\n - [Learning Transferable Architectures for Scalable Image Recognition]\n    (https://arxiv.org/abs/1707.07012)\n\"\"\"\n\"\"\"\nCode imported from https://github.com/Cadene/pretrained-models.pytorch\n\"\"\"\n\npretrained_settings = {\n    'nasnetamobile': {\n        'imagenet': {\n            # 'url': 'https://github.com/veronikayurchuk/pretrained-models.pytorch/releases/download/v1.0/nasnetmobile-7e03cead.pth.tar',\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/nasnetamobile-7e03cead.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224], # resize 256\n            'input_range': [0, 1],\n            'mean': [0.5, 0.5, 0.5],\n            'std': [0.5, 0.5, 0.5],\n            'num_classes': 1000\n        },\n        # 'imagenet+background': {\n        #     # 'url': 'http://data.lip6.fr/cadene/pretrainedmodels/nasnetalarge-a1897284.pth',\n        #     'input_space': 'RGB',\n        #     'input_size': [3, 224, 224], # resize 256\n        #     'input_range': [0, 1],\n        #     'mean': [0.5, 0.5, 0.5],\n        #     'std': [0.5, 0.5, 0.5],\n        #     'num_classes': 1001\n        # }\n    }\n}\n\n\nclass MaxPoolPad(nn.Module):\n\n    def __init__(self):\n        super(MaxPoolPad, self).__init__()\n        self.pad = nn.ZeroPad2d((1, 0, 1, 0))\n        self.pool = nn.MaxPool2d(3, stride=2, padding=1)\n\n    def forward(self, x):\n        x = self.pad(x)\n        x = self.pool(x)\n        x = x[:, :, 1:, 1:].contiguous()\n        return x\n\n\nclass AvgPoolPad(nn.Module):\n\n    def __init__(self, stride=2, padding=1):\n        super(AvgPoolPad, self).__init__()\n        self.pad = nn.ZeroPad2d((1, 0, 1, 0))\n        self.pool = nn.AvgPool2d(\n            3, stride=stride, padding=padding, count_include_pad=False\n        )\n\n    def forward(self, x):\n        x = self.pad(x)\n        x = self.pool(x)\n        x = x[:, :, 1:, 1:].contiguous()\n        return x\n\n\nclass SeparableConv2d(nn.Module):\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        dw_kernel,\n        dw_stride,\n        dw_padding,\n        bias=False\n    ):\n        super(SeparableConv2d, self).__init__()\n        self.depthwise_conv2d = nn.Conv2d(\n            in_channels,\n            in_channels,\n            dw_kernel,\n            stride=dw_stride,\n            padding=dw_padding,\n            bias=bias,\n            groups=in_channels\n        )\n        self.pointwise_conv2d = nn.Conv2d(\n            in_channels, out_channels, 1, stride=1, bias=bias\n        )\n\n    def forward(self, x):\n        x = self.depthwise_conv2d(x)\n        x = self.pointwise_conv2d(x)\n        return x\n\n\nclass BranchSeparables(nn.Module):\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        kernel_size,\n        stride,\n        padding,\n        name=None,\n        bias=False\n    ):\n        super(BranchSeparables, self).__init__()\n        self.relu = nn.ReLU()\n        self.separable_1 = SeparableConv2d(\n            in_channels, in_channels, kernel_size, stride, padding, bias=bias\n        )\n        self.bn_sep_1 = nn.BatchNorm2d(\n            in_channels, eps=0.001, momentum=0.1, affine=True\n        )\n        self.relu1 = nn.ReLU()\n        self.separable_2 = SeparableConv2d(\n            in_channels, out_channels, kernel_size, 1, padding, bias=bias\n        )\n        self.bn_sep_2 = nn.BatchNorm2d(\n            out_channels, eps=0.001, momentum=0.1, affine=True\n        )\n        self.name = name\n\n    def forward(self, x):\n        x = self.relu(x)\n        if self.name == 'specific':\n            x = nn.ZeroPad2d((1, 0, 1, 0))(x)\n        x = self.separable_1(x)\n        if self.name == 'specific':\n            x = x[:, :, 1:, 1:].contiguous()\n\n        x = self.bn_sep_1(x)\n        x = self.relu1(x)\n        x = self.separable_2(x)\n        x = self.bn_sep_2(x)\n        return x\n\n\nclass BranchSeparablesStem(nn.Module):\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        kernel_size,\n        stride,\n        padding,\n        bias=False\n    ):\n        super(BranchSeparablesStem, self).__init__()\n        self.relu = nn.ReLU()\n        self.separable_1 = SeparableConv2d(\n            in_channels, out_channels, kernel_size, stride, padding, bias=bias\n        )\n        self.bn_sep_1 = nn.BatchNorm2d(\n            out_channels, eps=0.001, momentum=0.1, affine=True\n        )\n        self.relu1 = nn.ReLU()\n        self.separable_2 = SeparableConv2d(\n            out_channels, out_channels, kernel_size, 1, padding, bias=bias\n        )\n        self.bn_sep_2 = nn.BatchNorm2d(\n            out_channels, eps=0.001, momentum=0.1, affine=True\n        )\n\n    def forward(self, x):\n        x = self.relu(x)\n        x = self.separable_1(x)\n        x = self.bn_sep_1(x)\n        x = self.relu1(x)\n        x = self.separable_2(x)\n        x = self.bn_sep_2(x)\n        return x\n\n\nclass BranchSeparablesReduction(BranchSeparables):\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        kernel_size,\n        stride,\n        padding,\n        z_padding=1,\n        bias=False\n    ):\n        BranchSeparables.__init__(\n            self, in_channels, out_channels, kernel_size, stride, padding, bias\n        )\n        self.padding = nn.ZeroPad2d((z_padding, 0, z_padding, 0))\n\n    def forward(self, x):\n        x = self.relu(x)\n        x = self.padding(x)\n        x = self.separable_1(x)\n        x = x[:, :, 1:, 1:].contiguous()\n        x = self.bn_sep_1(x)\n        x = self.relu1(x)\n        x = self.separable_2(x)\n        x = self.bn_sep_2(x)\n        return x\n\n\nclass CellStem0(nn.Module):\n\n    def __init__(self, stem_filters, num_filters=42):\n        super(CellStem0, self).__init__()\n        self.num_filters = num_filters\n        self.stem_filters = stem_filters\n        self.conv_1x1 = nn.Sequential()\n        self.conv_1x1.add_module('relu', nn.ReLU())\n        self.conv_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                self.stem_filters, self.num_filters, 1, stride=1, bias=False\n            )\n        )\n        self.conv_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                self.num_filters, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.comb_iter_0_left = BranchSeparables(\n            self.num_filters, self.num_filters, 5, 2, 2\n        )\n        self.comb_iter_0_right = BranchSeparablesStem(\n            self.stem_filters, self.num_filters, 7, 2, 3, bias=False\n        )\n\n        self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)\n        self.comb_iter_1_right = BranchSeparablesStem(\n            self.stem_filters, self.num_filters, 7, 2, 3, bias=False\n        )\n\n        self.comb_iter_2_left = nn.AvgPool2d(\n            3, stride=2, padding=1, count_include_pad=False\n        )\n        self.comb_iter_2_right = BranchSeparablesStem(\n            self.stem_filters, self.num_filters, 5, 2, 2, bias=False\n        )\n\n        self.comb_iter_3_right = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_4_left = BranchSeparables(\n            self.num_filters, self.num_filters, 3, 1, 1, bias=False\n        )\n        self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)\n\n    def forward(self, x):\n        x1 = self.conv_1x1(x)\n\n        x_comb_iter_0_left = self.comb_iter_0_left(x1)\n        x_comb_iter_0_right = self.comb_iter_0_right(x)\n        x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right\n\n        x_comb_iter_1_left = self.comb_iter_1_left(x1)\n        x_comb_iter_1_right = self.comb_iter_1_right(x)\n        x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right\n\n        x_comb_iter_2_left = self.comb_iter_2_left(x1)\n        x_comb_iter_2_right = self.comb_iter_2_right(x)\n        x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right\n\n        x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)\n        x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1\n\n        x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)\n        x_comb_iter_4_right = self.comb_iter_4_right(x1)\n        x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right\n\n        x_out = torch.cat(\n            [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1\n        )\n        return x_out\n\n\nclass CellStem1(nn.Module):\n\n    def __init__(self, stem_filters, num_filters):\n        super(CellStem1, self).__init__()\n        self.num_filters = num_filters\n        self.stem_filters = stem_filters\n        self.conv_1x1 = nn.Sequential()\n        self.conv_1x1.add_module('relu', nn.ReLU())\n        self.conv_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                2 * self.num_filters,\n                self.num_filters,\n                1,\n                stride=1,\n                bias=False\n            )\n        )\n        self.conv_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                self.num_filters, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.relu = nn.ReLU()\n        self.path_1 = nn.Sequential()\n        self.path_1.add_module(\n            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)\n        )\n        self.path_1.add_module(\n            'conv',\n            nn.Conv2d(\n                self.stem_filters,\n                self.num_filters // 2,\n                1,\n                stride=1,\n                bias=False\n            )\n        )\n        self.path_2 = nn.ModuleList()\n        self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1)))\n        self.path_2.add_module(\n            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)\n        )\n        self.path_2.add_module(\n            'conv',\n            nn.Conv2d(\n                self.stem_filters,\n                self.num_filters // 2,\n                1,\n                stride=1,\n                bias=False\n            )\n        )\n\n        self.final_path_bn = nn.BatchNorm2d(\n            self.num_filters, eps=0.001, momentum=0.1, affine=True\n        )\n\n        self.comb_iter_0_left = BranchSeparables(\n            self.num_filters,\n            self.num_filters,\n            5,\n            2,\n            2,\n            name='specific',\n            bias=False\n        )\n        self.comb_iter_0_right = BranchSeparables(\n            self.num_filters,\n            self.num_filters,\n            7,\n            2,\n            3,\n            name='specific',\n            bias=False\n        )\n\n        # self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)\n        self.comb_iter_1_left = MaxPoolPad()\n        self.comb_iter_1_right = BranchSeparables(\n            self.num_filters,\n            self.num_filters,\n            7,\n            2,\n            3,\n            name='specific',\n            bias=False\n        )\n\n        # self.comb_iter_2_left = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False)\n        self.comb_iter_2_left = AvgPoolPad()\n        self.comb_iter_2_right = BranchSeparables(\n            self.num_filters,\n            self.num_filters,\n            5,\n            2,\n            2,\n            name='specific',\n            bias=False\n        )\n\n        self.comb_iter_3_right = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_4_left = BranchSeparables(\n            self.num_filters,\n            self.num_filters,\n            3,\n            1,\n            1,\n            name='specific',\n            bias=False\n        )\n        # self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)\n        self.comb_iter_4_right = MaxPoolPad()\n\n    def forward(self, x_conv0, x_stem_0):\n        x_left = self.conv_1x1(x_stem_0)\n\n        x_relu = self.relu(x_conv0)\n        # path 1\n        x_path1 = self.path_1(x_relu)\n        # path 2\n        x_path2 = self.path_2.pad(x_relu)\n        x_path2 = x_path2[:, :, 1:, 1:]\n        x_path2 = self.path_2.avgpool(x_path2)\n        x_path2 = self.path_2.conv(x_path2)\n        # final path\n        x_right = self.final_path_bn(torch.cat([x_path1, x_path2], 1))\n\n        x_comb_iter_0_left = self.comb_iter_0_left(x_left)\n        x_comb_iter_0_right = self.comb_iter_0_right(x_right)\n        x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right\n\n        x_comb_iter_1_left = self.comb_iter_1_left(x_left)\n        x_comb_iter_1_right = self.comb_iter_1_right(x_right)\n        x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right\n\n        x_comb_iter_2_left = self.comb_iter_2_left(x_left)\n        x_comb_iter_2_right = self.comb_iter_2_right(x_right)\n        x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right\n\n        x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)\n        x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1\n\n        x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)\n        x_comb_iter_4_right = self.comb_iter_4_right(x_left)\n        x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right\n\n        x_out = torch.cat(\n            [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1\n        )\n        return x_out\n\n\nclass FirstCell(nn.Module):\n\n    def __init__(\n        self, in_channels_left, out_channels_left, in_channels_right,\n        out_channels_right\n    ):\n        super(FirstCell, self).__init__()\n        self.conv_1x1 = nn.Sequential()\n        self.conv_1x1.add_module('relu', nn.ReLU())\n        self.conv_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_right, out_channels_right, 1, stride=1, bias=False\n            )\n        )\n        self.conv_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_right, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.relu = nn.ReLU()\n        self.path_1 = nn.Sequential()\n        self.path_1.add_module(\n            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)\n        )\n        self.path_1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_left, out_channels_left, 1, stride=1, bias=False\n            )\n        )\n        self.path_2 = nn.ModuleList()\n        self.path_2.add_module('pad', nn.ZeroPad2d((0, 1, 0, 1)))\n        self.path_2.add_module(\n            'avgpool', nn.AvgPool2d(1, stride=2, count_include_pad=False)\n        )\n        self.path_2.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_left, out_channels_left, 1, stride=1, bias=False\n            )\n        )\n\n        self.final_path_bn = nn.BatchNorm2d(\n            out_channels_left * 2, eps=0.001, momentum=0.1, affine=True\n        )\n\n        self.comb_iter_0_left = BranchSeparables(\n            out_channels_right, out_channels_right, 5, 1, 2, bias=False\n        )\n        self.comb_iter_0_right = BranchSeparables(\n            out_channels_right, out_channels_right, 3, 1, 1, bias=False\n        )\n\n        self.comb_iter_1_left = BranchSeparables(\n            out_channels_right, out_channels_right, 5, 1, 2, bias=False\n        )\n        self.comb_iter_1_right = BranchSeparables(\n            out_channels_right, out_channels_right, 3, 1, 1, bias=False\n        )\n\n        self.comb_iter_2_left = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_3_left = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n        self.comb_iter_3_right = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_4_left = BranchSeparables(\n            out_channels_right, out_channels_right, 3, 1, 1, bias=False\n        )\n\n    def forward(self, x, x_prev):\n        x_relu = self.relu(x_prev)\n        # path 1\n        x_path1 = self.path_1(x_relu)\n        # path 2\n        x_path2 = self.path_2.pad(x_relu)\n        x_path2 = x_path2[:, :, 1:, 1:]\n        x_path2 = self.path_2.avgpool(x_path2)\n        x_path2 = self.path_2.conv(x_path2)\n        # final path\n        x_left = self.final_path_bn(torch.cat([x_path1, x_path2], 1))\n\n        x_right = self.conv_1x1(x)\n\n        x_comb_iter_0_left = self.comb_iter_0_left(x_right)\n        x_comb_iter_0_right = self.comb_iter_0_right(x_left)\n        x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right\n\n        x_comb_iter_1_left = self.comb_iter_1_left(x_left)\n        x_comb_iter_1_right = self.comb_iter_1_right(x_left)\n        x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right\n\n        x_comb_iter_2_left = self.comb_iter_2_left(x_right)\n        x_comb_iter_2 = x_comb_iter_2_left + x_left\n\n        x_comb_iter_3_left = self.comb_iter_3_left(x_left)\n        x_comb_iter_3_right = self.comb_iter_3_right(x_left)\n        x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right\n\n        x_comb_iter_4_left = self.comb_iter_4_left(x_right)\n        x_comb_iter_4 = x_comb_iter_4_left + x_right\n\n        x_out = torch.cat(\n            [\n                x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2,\n                x_comb_iter_3, x_comb_iter_4\n            ], 1\n        )\n        return x_out\n\n\nclass NormalCell(nn.Module):\n\n    def __init__(\n        self, in_channels_left, out_channels_left, in_channels_right,\n        out_channels_right\n    ):\n        super(NormalCell, self).__init__()\n        self.conv_prev_1x1 = nn.Sequential()\n        self.conv_prev_1x1.add_module('relu', nn.ReLU())\n        self.conv_prev_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_left, out_channels_left, 1, stride=1, bias=False\n            )\n        )\n        self.conv_prev_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_left, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.conv_1x1 = nn.Sequential()\n        self.conv_1x1.add_module('relu', nn.ReLU())\n        self.conv_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_right, out_channels_right, 1, stride=1, bias=False\n            )\n        )\n        self.conv_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_right, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.comb_iter_0_left = BranchSeparables(\n            out_channels_right, out_channels_right, 5, 1, 2, bias=False\n        )\n        self.comb_iter_0_right = BranchSeparables(\n            out_channels_left, out_channels_left, 3, 1, 1, bias=False\n        )\n\n        self.comb_iter_1_left = BranchSeparables(\n            out_channels_left, out_channels_left, 5, 1, 2, bias=False\n        )\n        self.comb_iter_1_right = BranchSeparables(\n            out_channels_left, out_channels_left, 3, 1, 1, bias=False\n        )\n\n        self.comb_iter_2_left = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_3_left = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n        self.comb_iter_3_right = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_4_left = BranchSeparables(\n            out_channels_right, out_channels_right, 3, 1, 1, bias=False\n        )\n\n    def forward(self, x, x_prev):\n        x_left = self.conv_prev_1x1(x_prev)\n        x_right = self.conv_1x1(x)\n\n        x_comb_iter_0_left = self.comb_iter_0_left(x_right)\n        x_comb_iter_0_right = self.comb_iter_0_right(x_left)\n        x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right\n\n        x_comb_iter_1_left = self.comb_iter_1_left(x_left)\n        x_comb_iter_1_right = self.comb_iter_1_right(x_left)\n        x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right\n\n        x_comb_iter_2_left = self.comb_iter_2_left(x_right)\n        x_comb_iter_2 = x_comb_iter_2_left + x_left\n\n        x_comb_iter_3_left = self.comb_iter_3_left(x_left)\n        x_comb_iter_3_right = self.comb_iter_3_right(x_left)\n        x_comb_iter_3 = x_comb_iter_3_left + x_comb_iter_3_right\n\n        x_comb_iter_4_left = self.comb_iter_4_left(x_right)\n        x_comb_iter_4 = x_comb_iter_4_left + x_right\n\n        x_out = torch.cat(\n            [\n                x_left, x_comb_iter_0, x_comb_iter_1, x_comb_iter_2,\n                x_comb_iter_3, x_comb_iter_4\n            ], 1\n        )\n        return x_out\n\n\nclass ReductionCell0(nn.Module):\n\n    def __init__(\n        self, in_channels_left, out_channels_left, in_channels_right,\n        out_channels_right\n    ):\n        super(ReductionCell0, self).__init__()\n        self.conv_prev_1x1 = nn.Sequential()\n        self.conv_prev_1x1.add_module('relu', nn.ReLU())\n        self.conv_prev_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_left, out_channels_left, 1, stride=1, bias=False\n            )\n        )\n        self.conv_prev_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_left, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.conv_1x1 = nn.Sequential()\n        self.conv_1x1.add_module('relu', nn.ReLU())\n        self.conv_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_right, out_channels_right, 1, stride=1, bias=False\n            )\n        )\n        self.conv_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_right, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.comb_iter_0_left = BranchSeparablesReduction(\n            out_channels_right, out_channels_right, 5, 2, 2, bias=False\n        )\n        self.comb_iter_0_right = BranchSeparablesReduction(\n            out_channels_right, out_channels_right, 7, 2, 3, bias=False\n        )\n\n        self.comb_iter_1_left = MaxPoolPad()\n        self.comb_iter_1_right = BranchSeparablesReduction(\n            out_channels_right, out_channels_right, 7, 2, 3, bias=False\n        )\n\n        self.comb_iter_2_left = AvgPoolPad()\n        self.comb_iter_2_right = BranchSeparablesReduction(\n            out_channels_right, out_channels_right, 5, 2, 2, bias=False\n        )\n\n        self.comb_iter_3_right = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_4_left = BranchSeparablesReduction(\n            out_channels_right, out_channels_right, 3, 1, 1, bias=False\n        )\n        self.comb_iter_4_right = MaxPoolPad()\n\n    def forward(self, x, x_prev):\n        x_left = self.conv_prev_1x1(x_prev)\n        x_right = self.conv_1x1(x)\n\n        x_comb_iter_0_left = self.comb_iter_0_left(x_right)\n        x_comb_iter_0_right = self.comb_iter_0_right(x_left)\n        x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right\n\n        x_comb_iter_1_left = self.comb_iter_1_left(x_right)\n        x_comb_iter_1_right = self.comb_iter_1_right(x_left)\n        x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right\n\n        x_comb_iter_2_left = self.comb_iter_2_left(x_right)\n        x_comb_iter_2_right = self.comb_iter_2_right(x_left)\n        x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right\n\n        x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)\n        x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1\n\n        x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)\n        x_comb_iter_4_right = self.comb_iter_4_right(x_right)\n        x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right\n\n        x_out = torch.cat(\n            [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1\n        )\n        return x_out\n\n\nclass ReductionCell1(nn.Module):\n\n    def __init__(\n        self, in_channels_left, out_channels_left, in_channels_right,\n        out_channels_right\n    ):\n        super(ReductionCell1, self).__init__()\n        self.conv_prev_1x1 = nn.Sequential()\n        self.conv_prev_1x1.add_module('relu', nn.ReLU())\n        self.conv_prev_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_left, out_channels_left, 1, stride=1, bias=False\n            )\n        )\n        self.conv_prev_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_left, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.conv_1x1 = nn.Sequential()\n        self.conv_1x1.add_module('relu', nn.ReLU())\n        self.conv_1x1.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels_right, out_channels_right, 1, stride=1, bias=False\n            )\n        )\n        self.conv_1x1.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                out_channels_right, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.comb_iter_0_left = BranchSeparables(\n            out_channels_right,\n            out_channels_right,\n            5,\n            2,\n            2,\n            name='specific',\n            bias=False\n        )\n        self.comb_iter_0_right = BranchSeparables(\n            out_channels_right,\n            out_channels_right,\n            7,\n            2,\n            3,\n            name='specific',\n            bias=False\n        )\n\n        # self.comb_iter_1_left = nn.MaxPool2d(3, stride=2, padding=1)\n        self.comb_iter_1_left = MaxPoolPad()\n        self.comb_iter_1_right = BranchSeparables(\n            out_channels_right,\n            out_channels_right,\n            7,\n            2,\n            3,\n            name='specific',\n            bias=False\n        )\n\n        # self.comb_iter_2_left = nn.AvgPool2d(3, stride=2, padding=1, count_include_pad=False)\n        self.comb_iter_2_left = AvgPoolPad()\n        self.comb_iter_2_right = BranchSeparables(\n            out_channels_right,\n            out_channels_right,\n            5,\n            2,\n            2,\n            name='specific',\n            bias=False\n        )\n\n        self.comb_iter_3_right = nn.AvgPool2d(\n            3, stride=1, padding=1, count_include_pad=False\n        )\n\n        self.comb_iter_4_left = BranchSeparables(\n            out_channels_right,\n            out_channels_right,\n            3,\n            1,\n            1,\n            name='specific',\n            bias=False\n        )\n        # self.comb_iter_4_right = nn.MaxPool2d(3, stride=2, padding=1)\n        self.comb_iter_4_right = MaxPoolPad()\n\n    def forward(self, x, x_prev):\n        x_left = self.conv_prev_1x1(x_prev)\n        x_right = self.conv_1x1(x)\n\n        x_comb_iter_0_left = self.comb_iter_0_left(x_right)\n        x_comb_iter_0_right = self.comb_iter_0_right(x_left)\n        x_comb_iter_0 = x_comb_iter_0_left + x_comb_iter_0_right\n\n        x_comb_iter_1_left = self.comb_iter_1_left(x_right)\n        x_comb_iter_1_right = self.comb_iter_1_right(x_left)\n        x_comb_iter_1 = x_comb_iter_1_left + x_comb_iter_1_right\n\n        x_comb_iter_2_left = self.comb_iter_2_left(x_right)\n        x_comb_iter_2_right = self.comb_iter_2_right(x_left)\n        x_comb_iter_2 = x_comb_iter_2_left + x_comb_iter_2_right\n\n        x_comb_iter_3_right = self.comb_iter_3_right(x_comb_iter_0)\n        x_comb_iter_3 = x_comb_iter_3_right + x_comb_iter_1\n\n        x_comb_iter_4_left = self.comb_iter_4_left(x_comb_iter_0)\n        x_comb_iter_4_right = self.comb_iter_4_right(x_right)\n        x_comb_iter_4 = x_comb_iter_4_left + x_comb_iter_4_right\n\n        x_out = torch.cat(\n            [x_comb_iter_1, x_comb_iter_2, x_comb_iter_3, x_comb_iter_4], 1\n        )\n        return x_out\n\n\nclass NASNetAMobile(nn.Module):\n    \"\"\"Neural Architecture Search (NAS).\n\n    Reference:\n        Zoph et al. Learning Transferable Architectures\n        for Scalable Image Recognition. CVPR 2018.\n\n    Public keys:\n        - ``nasnetamobile``: NASNet-A Mobile.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        stem_filters=32,\n        penultimate_filters=1056,\n        filters_multiplier=2,\n        **kwargs\n    ):\n        super(NASNetAMobile, self).__init__()\n        self.stem_filters = stem_filters\n        self.penultimate_filters = penultimate_filters\n        self.filters_multiplier = filters_multiplier\n        self.loss = loss\n\n        filters = self.penultimate_filters // 24\n        # 24 is default value for the architecture\n\n        self.conv0 = nn.Sequential()\n        self.conv0.add_module(\n            'conv',\n            nn.Conv2d(\n                in_channels=3,\n                out_channels=self.stem_filters,\n                kernel_size=3,\n                padding=0,\n                stride=2,\n                bias=False\n            )\n        )\n        self.conv0.add_module(\n            'bn',\n            nn.BatchNorm2d(\n                self.stem_filters, eps=0.001, momentum=0.1, affine=True\n            )\n        )\n\n        self.cell_stem_0 = CellStem0(\n            self.stem_filters, num_filters=filters // (filters_multiplier**2)\n        )\n        self.cell_stem_1 = CellStem1(\n            self.stem_filters, num_filters=filters // filters_multiplier\n        )\n\n        self.cell_0 = FirstCell(\n            in_channels_left=filters,\n            out_channels_left=filters // 2, # 1, 0.5\n            in_channels_right=2 * filters,\n            out_channels_right=filters\n        ) # 2, 1\n        self.cell_1 = NormalCell(\n            in_channels_left=2 * filters,\n            out_channels_left=filters, # 2, 1\n            in_channels_right=6 * filters,\n            out_channels_right=filters\n        ) # 6, 1\n        self.cell_2 = NormalCell(\n            in_channels_left=6 * filters,\n            out_channels_left=filters, # 6, 1\n            in_channels_right=6 * filters,\n            out_channels_right=filters\n        ) # 6, 1\n        self.cell_3 = NormalCell(\n            in_channels_left=6 * filters,\n            out_channels_left=filters, # 6, 1\n            in_channels_right=6 * filters,\n            out_channels_right=filters\n        ) # 6, 1\n\n        self.reduction_cell_0 = ReductionCell0(\n            in_channels_left=6 * filters,\n            out_channels_left=2 * filters, # 6, 2\n            in_channels_right=6 * filters,\n            out_channels_right=2 * filters\n        ) # 6, 2\n\n        self.cell_6 = FirstCell(\n            in_channels_left=6 * filters,\n            out_channels_left=filters, # 6, 1\n            in_channels_right=8 * filters,\n            out_channels_right=2 * filters\n        ) # 8, 2\n        self.cell_7 = NormalCell(\n            in_channels_left=8 * filters,\n            out_channels_left=2 * filters, # 8, 2\n            in_channels_right=12 * filters,\n            out_channels_right=2 * filters\n        ) # 12, 2\n        self.cell_8 = NormalCell(\n            in_channels_left=12 * filters,\n            out_channels_left=2 * filters, # 12, 2\n            in_channels_right=12 * filters,\n            out_channels_right=2 * filters\n        ) # 12, 2\n        self.cell_9 = NormalCell(\n            in_channels_left=12 * filters,\n            out_channels_left=2 * filters, # 12, 2\n            in_channels_right=12 * filters,\n            out_channels_right=2 * filters\n        ) # 12, 2\n\n        self.reduction_cell_1 = ReductionCell1(\n            in_channels_left=12 * filters,\n            out_channels_left=4 * filters, # 12, 4\n            in_channels_right=12 * filters,\n            out_channels_right=4 * filters\n        ) # 12, 4\n\n        self.cell_12 = FirstCell(\n            in_channels_left=12 * filters,\n            out_channels_left=2 * filters, # 12, 2\n            in_channels_right=16 * filters,\n            out_channels_right=4 * filters\n        ) # 16, 4\n        self.cell_13 = NormalCell(\n            in_channels_left=16 * filters,\n            out_channels_left=4 * filters, # 16, 4\n            in_channels_right=24 * filters,\n            out_channels_right=4 * filters\n        ) # 24, 4\n        self.cell_14 = NormalCell(\n            in_channels_left=24 * filters,\n            out_channels_left=4 * filters, # 24, 4\n            in_channels_right=24 * filters,\n            out_channels_right=4 * filters\n        ) # 24, 4\n        self.cell_15 = NormalCell(\n            in_channels_left=24 * filters,\n            out_channels_left=4 * filters, # 24, 4\n            in_channels_right=24 * filters,\n            out_channels_right=4 * filters\n        ) # 24, 4\n\n        self.relu = nn.ReLU()\n        self.dropout = nn.Dropout()\n        self.classifier = nn.Linear(24 * filters, num_classes)\n\n        self._init_params()\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def features(self, input):\n        x_conv0 = self.conv0(input)\n        x_stem_0 = self.cell_stem_0(x_conv0)\n        x_stem_1 = self.cell_stem_1(x_conv0, x_stem_0)\n\n        x_cell_0 = self.cell_0(x_stem_1, x_stem_0)\n        x_cell_1 = self.cell_1(x_cell_0, x_stem_1)\n        x_cell_2 = self.cell_2(x_cell_1, x_cell_0)\n        x_cell_3 = self.cell_3(x_cell_2, x_cell_1)\n\n        x_reduction_cell_0 = self.reduction_cell_0(x_cell_3, x_cell_2)\n\n        x_cell_6 = self.cell_6(x_reduction_cell_0, x_cell_3)\n        x_cell_7 = self.cell_7(x_cell_6, x_reduction_cell_0)\n        x_cell_8 = self.cell_8(x_cell_7, x_cell_6)\n        x_cell_9 = self.cell_9(x_cell_8, x_cell_7)\n\n        x_reduction_cell_1 = self.reduction_cell_1(x_cell_9, x_cell_8)\n\n        x_cell_12 = self.cell_12(x_reduction_cell_1, x_cell_9)\n        x_cell_13 = self.cell_13(x_cell_12, x_reduction_cell_1)\n        x_cell_14 = self.cell_14(x_cell_13, x_cell_12)\n        x_cell_15 = self.cell_15(x_cell_14, x_cell_13)\n\n        x_cell_15 = self.relu(x_cell_15)\n        x_cell_15 = F.avg_pool2d(\n            x_cell_15,\n            x_cell_15.size()[2:]\n        ) # global average pool\n        x_cell_15 = x_cell_15.view(x_cell_15.size(0), -1)\n        x_cell_15 = self.dropout(x_cell_15)\n\n        return x_cell_15\n\n    def forward(self, input):\n        v = self.features(input)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef nasnetamobile(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = NASNetAMobile(num_classes, loss, **kwargs)\n    if pretrained:\n        model_url = pretrained_settings['nasnetamobile']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/osnet.py",
    "content": "from __future__ import division, absolute_import\nimport warnings\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = [\n    'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25', 'osnet_ibn_x1_0'\n]\n\npretrained_urls = {\n    'osnet_x1_0':\n    'https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY',\n    'osnet_x0_75':\n    'https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq',\n    'osnet_x0_5':\n    'https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i',\n    'osnet_x0_25':\n    'https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs',\n    'osnet_ibn_x1_0':\n    'https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l'\n}\n\n\n##########\n# Basic layers\n##########\nclass ConvLayer(nn.Module):\n    \"\"\"Convolution layer (conv + bn + relu).\"\"\"\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        kernel_size,\n        stride=1,\n        padding=0,\n        groups=1,\n        IN=False\n    ):\n        super(ConvLayer, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride=stride,\n            padding=padding,\n            bias=False,\n            groups=groups\n        )\n        if IN:\n            self.bn = nn.InstanceNorm2d(out_channels, affine=True)\n        else:\n            self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        x = self.relu(x)\n        return x\n\n\nclass Conv1x1(nn.Module):\n    \"\"\"1x1 convolution + bn + relu.\"\"\"\n\n    def __init__(self, in_channels, out_channels, stride=1, groups=1):\n        super(Conv1x1, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            1,\n            stride=stride,\n            padding=0,\n            bias=False,\n            groups=groups\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        x = self.relu(x)\n        return x\n\n\nclass Conv1x1Linear(nn.Module):\n    \"\"\"1x1 convolution + bn (w/o non-linearity).\"\"\"\n\n    def __init__(self, in_channels, out_channels, stride=1):\n        super(Conv1x1Linear, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels, out_channels, 1, stride=stride, padding=0, bias=False\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        return x\n\n\nclass Conv3x3(nn.Module):\n    \"\"\"3x3 convolution + bn + relu.\"\"\"\n\n    def __init__(self, in_channels, out_channels, stride=1, groups=1):\n        super(Conv3x3, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            3,\n            stride=stride,\n            padding=1,\n            bias=False,\n            groups=groups\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        x = self.relu(x)\n        return x\n\n\nclass LightConv3x3(nn.Module):\n    \"\"\"Lightweight 3x3 convolution.\n\n    1x1 (linear) + dw 3x3 (nonlinear).\n    \"\"\"\n\n    def __init__(self, in_channels, out_channels):\n        super(LightConv3x3, self).__init__()\n        self.conv1 = nn.Conv2d(\n            in_channels, out_channels, 1, stride=1, padding=0, bias=False\n        )\n        self.conv2 = nn.Conv2d(\n            out_channels,\n            out_channels,\n            3,\n            stride=1,\n            padding=1,\n            bias=False,\n            groups=out_channels\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.conv2(x)\n        x = self.bn(x)\n        x = self.relu(x)\n        return x\n\n\n##########\n# Building blocks for omni-scale feature learning\n##########\nclass ChannelGate(nn.Module):\n    \"\"\"A mini-network that generates channel-wise gates conditioned on input tensor.\"\"\"\n\n    def __init__(\n        self,\n        in_channels,\n        num_gates=None,\n        return_gates=False,\n        gate_activation='sigmoid',\n        reduction=16,\n        layer_norm=False\n    ):\n        super(ChannelGate, self).__init__()\n        if num_gates is None:\n            num_gates = in_channels\n        self.return_gates = return_gates\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.fc1 = nn.Conv2d(\n            in_channels,\n            in_channels // reduction,\n            kernel_size=1,\n            bias=True,\n            padding=0\n        )\n        self.norm1 = None\n        if layer_norm:\n            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))\n        self.relu = nn.ReLU(inplace=True)\n        self.fc2 = nn.Conv2d(\n            in_channels // reduction,\n            num_gates,\n            kernel_size=1,\n            bias=True,\n            padding=0\n        )\n        if gate_activation == 'sigmoid':\n            self.gate_activation = nn.Sigmoid()\n        elif gate_activation == 'relu':\n            self.gate_activation = nn.ReLU(inplace=True)\n        elif gate_activation == 'linear':\n            self.gate_activation = None\n        else:\n            raise RuntimeError(\n                \"Unknown gate activation: {}\".format(gate_activation)\n            )\n\n    def forward(self, x):\n        input = x\n        x = self.global_avgpool(x)\n        x = self.fc1(x)\n        if self.norm1 is not None:\n            x = self.norm1(x)\n        x = self.relu(x)\n        x = self.fc2(x)\n        if self.gate_activation is not None:\n            x = self.gate_activation(x)\n        if self.return_gates:\n            return x\n        return input * x\n\n\nclass OSBlock(nn.Module):\n    \"\"\"Omni-scale feature learning block.\"\"\"\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        IN=False,\n        bottleneck_reduction=4,\n        **kwargs\n    ):\n        super(OSBlock, self).__init__()\n        mid_channels = out_channels // bottleneck_reduction\n        self.conv1 = Conv1x1(in_channels, mid_channels)\n        self.conv2a = LightConv3x3(mid_channels, mid_channels)\n        self.conv2b = nn.Sequential(\n            LightConv3x3(mid_channels, mid_channels),\n            LightConv3x3(mid_channels, mid_channels),\n        )\n        self.conv2c = nn.Sequential(\n            LightConv3x3(mid_channels, mid_channels),\n            LightConv3x3(mid_channels, mid_channels),\n            LightConv3x3(mid_channels, mid_channels),\n        )\n        self.conv2d = nn.Sequential(\n            LightConv3x3(mid_channels, mid_channels),\n            LightConv3x3(mid_channels, mid_channels),\n            LightConv3x3(mid_channels, mid_channels),\n            LightConv3x3(mid_channels, mid_channels),\n        )\n        self.gate = ChannelGate(mid_channels)\n        self.conv3 = Conv1x1Linear(mid_channels, out_channels)\n        self.downsample = None\n        if in_channels != out_channels:\n            self.downsample = Conv1x1Linear(in_channels, out_channels)\n        self.IN = None\n        if IN:\n            self.IN = nn.InstanceNorm2d(out_channels, affine=True)\n\n    def forward(self, x):\n        identity = x\n        x1 = self.conv1(x)\n        x2a = self.conv2a(x1)\n        x2b = self.conv2b(x1)\n        x2c = self.conv2c(x1)\n        x2d = self.conv2d(x1)\n        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)\n        x3 = self.conv3(x2)\n        if self.downsample is not None:\n            identity = self.downsample(identity)\n        out = x3 + identity\n        if self.IN is not None:\n            out = self.IN(out)\n        return F.relu(out)\n\n\n##########\n# Network architecture\n##########\nclass OSNet(nn.Module):\n    \"\"\"Omni-Scale Network.\n    \n    Reference:\n        - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.\n        - Zhou et al. Learning Generalisable Omni-Scale Representations\n          for Person Re-Identification. TPAMI, 2021.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        blocks,\n        layers,\n        channels,\n        feature_dim=512,\n        loss='softmax',\n        IN=False,\n        **kwargs\n    ):\n        super(OSNet, self).__init__()\n        num_blocks = len(blocks)\n        assert num_blocks == len(layers)\n        assert num_blocks == len(channels) - 1\n        self.loss = loss\n        self.feature_dim = feature_dim\n\n        # convolutional backbone\n        self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)\n        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)\n        self.conv2 = self._make_layer(\n            blocks[0],\n            layers[0],\n            channels[0],\n            channels[1],\n            reduce_spatial_size=True,\n            IN=IN\n        )\n        self.conv3 = self._make_layer(\n            blocks[1],\n            layers[1],\n            channels[1],\n            channels[2],\n            reduce_spatial_size=True\n        )\n        self.conv4 = self._make_layer(\n            blocks[2],\n            layers[2],\n            channels[2],\n            channels[3],\n            reduce_spatial_size=False\n        )\n        self.conv5 = Conv1x1(channels[3], channels[3])\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        # fully connected layer\n        self.fc = self._construct_fc_layer(\n            self.feature_dim, channels[3], dropout_p=None\n        )\n        # identity classification layer\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _make_layer(\n        self,\n        block,\n        layer,\n        in_channels,\n        out_channels,\n        reduce_spatial_size,\n        IN=False\n    ):\n        layers = []\n\n        layers.append(block(in_channels, out_channels, IN=IN))\n        for i in range(1, layer):\n            layers.append(block(out_channels, out_channels, IN=IN))\n\n        if reduce_spatial_size:\n            layers.append(\n                nn.Sequential(\n                    Conv1x1(out_channels, out_channels),\n                    nn.AvgPool2d(2, stride=2)\n                )\n            )\n\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        if fc_dims is None or fc_dims < 0:\n            self.feature_dim = input_dim\n            return None\n\n        if isinstance(fc_dims, int):\n            fc_dims = [fc_dims]\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.maxpool(x)\n        x = self.conv2(x)\n        x = self.conv3(x)\n        x = self.conv4(x)\n        x = self.conv5(x)\n        return x\n\n    def forward(self, x, return_featuremaps=False):\n        x = self.featuremaps(x)\n        if return_featuremaps:\n            return x\n        v = self.global_avgpool(x)\n        v = v.view(v.size(0), -1)\n        if self.fc is not None:\n            v = self.fc(v)\n        if not self.training:\n            return v\n        y = self.classifier(v)\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, key=''):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    import os\n    import errno\n    import gdown\n    from collections import OrderedDict\n\n    def _get_torch_home():\n        ENV_TORCH_HOME = 'TORCH_HOME'\n        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'\n        DEFAULT_CACHE_DIR = '~/.cache'\n        torch_home = os.path.expanduser(\n            os.getenv(\n                ENV_TORCH_HOME,\n                os.path.join(\n                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'\n                )\n            )\n        )\n        return torch_home\n\n    torch_home = _get_torch_home()\n    model_dir = os.path.join(torch_home, 'checkpoints')\n    try:\n        os.makedirs(model_dir)\n    except OSError as e:\n        if e.errno == errno.EEXIST:\n            # Directory already exists, ignore.\n            pass\n        else:\n            # Unexpected OSError, re-raise.\n            raise\n    filename = key + '_imagenet.pth'\n    cached_file = os.path.join(model_dir, filename)\n\n    if not os.path.exists(cached_file):\n        gdown.download(pretrained_urls[key], cached_file, quiet=False)\n\n    state_dict = torch.load(cached_file)\n    model_dict = model.state_dict()\n    new_state_dict = OrderedDict()\n    matched_layers, discarded_layers = [], []\n\n    for k, v in state_dict.items():\n        if k.startswith('module.'):\n            k = k[7:] # discard module.\n\n        if k in model_dict and model_dict[k].size() == v.size():\n            new_state_dict[k] = v\n            matched_layers.append(k)\n        else:\n            discarded_layers.append(k)\n\n    model_dict.update(new_state_dict)\n    model.load_state_dict(model_dict)\n\n    if len(matched_layers) == 0:\n        warnings.warn(\n            'The pretrained weights from \"{}\" cannot be loaded, '\n            'please check the key names manually '\n            '(** ignored and continue **)'.format(cached_file)\n        )\n    else:\n        print(\n            'Successfully loaded imagenet pretrained weights from \"{}\"'.\n            format(cached_file)\n        )\n        if len(discarded_layers) > 0:\n            print(\n                '** The following layers are discarded '\n                'due to unmatched keys or layer size: {}'.\n                format(discarded_layers)\n            )\n\n\n##########\n# Instantiation\n##########\ndef osnet_x1_0(num_classes=1000, pretrained=True, loss='softmax', **kwargs):\n    # standard size (width x1.0)\n    model = OSNet(\n        num_classes,\n        blocks=[OSBlock, OSBlock, OSBlock],\n        layers=[2, 2, 2],\n        channels=[64, 256, 384, 512],\n        loss=loss,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_x1_0')\n    return model\n\n\ndef osnet_x0_75(num_classes=1000, pretrained=True, loss='softmax', **kwargs):\n    # medium size (width x0.75)\n    model = OSNet(\n        num_classes,\n        blocks=[OSBlock, OSBlock, OSBlock],\n        layers=[2, 2, 2],\n        channels=[48, 192, 288, 384],\n        loss=loss,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_x0_75')\n    return model\n\n\ndef osnet_x0_5(num_classes=1000, pretrained=True, loss='softmax', **kwargs):\n    # tiny size (width x0.5)\n    model = OSNet(\n        num_classes,\n        blocks=[OSBlock, OSBlock, OSBlock],\n        layers=[2, 2, 2],\n        channels=[32, 128, 192, 256],\n        loss=loss,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_x0_5')\n    return model\n\n\ndef osnet_x0_25(num_classes=1000, pretrained=True, loss='softmax', **kwargs):\n    # very tiny size (width x0.25)\n    model = OSNet(\n        num_classes,\n        blocks=[OSBlock, OSBlock, OSBlock],\n        layers=[2, 2, 2],\n        channels=[16, 64, 96, 128],\n        loss=loss,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_x0_25')\n    return model\n\n\ndef osnet_ibn_x1_0(\n    num_classes=1000, pretrained=True, loss='softmax', **kwargs\n):\n    # standard size (width x1.0) + IBN layer\n    # Ref: Pan et al. Two at Once: Enhancing Learning and Generalization Capacities via IBN-Net. ECCV, 2018.\n    model = OSNet(\n        num_classes,\n        blocks=[OSBlock, OSBlock, OSBlock],\n        layers=[2, 2, 2],\n        channels=[64, 256, 384, 512],\n        loss=loss,\n        IN=True,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_ibn_x1_0')\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/osnet_ain.py",
    "content": "from __future__ import division, absolute_import\nimport warnings\nimport torch\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = [\n    'osnet_ain_x1_0', 'osnet_ain_x0_75', 'osnet_ain_x0_5', 'osnet_ain_x0_25'\n]\n\npretrained_urls = {\n    'osnet_ain_x1_0':\n    'https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo',\n    'osnet_ain_x0_75':\n    'https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM',\n    'osnet_ain_x0_5':\n    'https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l',\n    'osnet_ain_x0_25':\n    'https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt'\n}\n\n\n##########\n# Basic layers\n##########\nclass ConvLayer(nn.Module):\n    \"\"\"Convolution layer (conv + bn + relu).\"\"\"\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        kernel_size,\n        stride=1,\n        padding=0,\n        groups=1,\n        IN=False\n    ):\n        super(ConvLayer, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            kernel_size,\n            stride=stride,\n            padding=padding,\n            bias=False,\n            groups=groups\n        )\n        if IN:\n            self.bn = nn.InstanceNorm2d(out_channels, affine=True)\n        else:\n            self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU()\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        return self.relu(x)\n\n\nclass Conv1x1(nn.Module):\n    \"\"\"1x1 convolution + bn + relu.\"\"\"\n\n    def __init__(self, in_channels, out_channels, stride=1, groups=1):\n        super(Conv1x1, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            1,\n            stride=stride,\n            padding=0,\n            bias=False,\n            groups=groups\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU()\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        return self.relu(x)\n\n\nclass Conv1x1Linear(nn.Module):\n    \"\"\"1x1 convolution + bn (w/o non-linearity).\"\"\"\n\n    def __init__(self, in_channels, out_channels, stride=1, bn=True):\n        super(Conv1x1Linear, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels, out_channels, 1, stride=stride, padding=0, bias=False\n        )\n        self.bn = None\n        if bn:\n            self.bn = nn.BatchNorm2d(out_channels)\n\n    def forward(self, x):\n        x = self.conv(x)\n        if self.bn is not None:\n            x = self.bn(x)\n        return x\n\n\nclass Conv3x3(nn.Module):\n    \"\"\"3x3 convolution + bn + relu.\"\"\"\n\n    def __init__(self, in_channels, out_channels, stride=1, groups=1):\n        super(Conv3x3, self).__init__()\n        self.conv = nn.Conv2d(\n            in_channels,\n            out_channels,\n            3,\n            stride=stride,\n            padding=1,\n            bias=False,\n            groups=groups\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU()\n\n    def forward(self, x):\n        x = self.conv(x)\n        x = self.bn(x)\n        return self.relu(x)\n\n\nclass LightConv3x3(nn.Module):\n    \"\"\"Lightweight 3x3 convolution.\n\n    1x1 (linear) + dw 3x3 (nonlinear).\n    \"\"\"\n\n    def __init__(self, in_channels, out_channels):\n        super(LightConv3x3, self).__init__()\n        self.conv1 = nn.Conv2d(\n            in_channels, out_channels, 1, stride=1, padding=0, bias=False\n        )\n        self.conv2 = nn.Conv2d(\n            out_channels,\n            out_channels,\n            3,\n            stride=1,\n            padding=1,\n            bias=False,\n            groups=out_channels\n        )\n        self.bn = nn.BatchNorm2d(out_channels)\n        self.relu = nn.ReLU()\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.conv2(x)\n        x = self.bn(x)\n        return self.relu(x)\n\n\nclass LightConvStream(nn.Module):\n    \"\"\"Lightweight convolution stream.\"\"\"\n\n    def __init__(self, in_channels, out_channels, depth):\n        super(LightConvStream, self).__init__()\n        assert depth >= 1, 'depth must be equal to or larger than 1, but got {}'.format(\n            depth\n        )\n        layers = []\n        layers += [LightConv3x3(in_channels, out_channels)]\n        for i in range(depth - 1):\n            layers += [LightConv3x3(out_channels, out_channels)]\n        self.layers = nn.Sequential(*layers)\n\n    def forward(self, x):\n        return self.layers(x)\n\n\n##########\n# Building blocks for omni-scale feature learning\n##########\nclass ChannelGate(nn.Module):\n    \"\"\"A mini-network that generates channel-wise gates conditioned on input tensor.\"\"\"\n\n    def __init__(\n        self,\n        in_channels,\n        num_gates=None,\n        return_gates=False,\n        gate_activation='sigmoid',\n        reduction=16,\n        layer_norm=False\n    ):\n        super(ChannelGate, self).__init__()\n        if num_gates is None:\n            num_gates = in_channels\n        self.return_gates = return_gates\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.fc1 = nn.Conv2d(\n            in_channels,\n            in_channels // reduction,\n            kernel_size=1,\n            bias=True,\n            padding=0\n        )\n        self.norm1 = None\n        if layer_norm:\n            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))\n        self.relu = nn.ReLU()\n        self.fc2 = nn.Conv2d(\n            in_channels // reduction,\n            num_gates,\n            kernel_size=1,\n            bias=True,\n            padding=0\n        )\n        if gate_activation == 'sigmoid':\n            self.gate_activation = nn.Sigmoid()\n        elif gate_activation == 'relu':\n            self.gate_activation = nn.ReLU()\n        elif gate_activation == 'linear':\n            self.gate_activation = None\n        else:\n            raise RuntimeError(\n                \"Unknown gate activation: {}\".format(gate_activation)\n            )\n\n    def forward(self, x):\n        input = x\n        x = self.global_avgpool(x)\n        x = self.fc1(x)\n        if self.norm1 is not None:\n            x = self.norm1(x)\n        x = self.relu(x)\n        x = self.fc2(x)\n        if self.gate_activation is not None:\n            x = self.gate_activation(x)\n        if self.return_gates:\n            return x\n        return input * x\n\n\nclass OSBlock(nn.Module):\n    \"\"\"Omni-scale feature learning block.\"\"\"\n\n    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):\n        super(OSBlock, self).__init__()\n        assert T >= 1\n        assert out_channels >= reduction and out_channels % reduction == 0\n        mid_channels = out_channels // reduction\n\n        self.conv1 = Conv1x1(in_channels, mid_channels)\n        self.conv2 = nn.ModuleList()\n        for t in range(1, T + 1):\n            self.conv2 += [LightConvStream(mid_channels, mid_channels, t)]\n        self.gate = ChannelGate(mid_channels)\n        self.conv3 = Conv1x1Linear(mid_channels, out_channels)\n        self.downsample = None\n        if in_channels != out_channels:\n            self.downsample = Conv1x1Linear(in_channels, out_channels)\n\n    def forward(self, x):\n        identity = x\n        x1 = self.conv1(x)\n        x2 = 0\n        for conv2_t in self.conv2:\n            x2_t = conv2_t(x1)\n            x2 = x2 + self.gate(x2_t)\n        x3 = self.conv3(x2)\n        if self.downsample is not None:\n            identity = self.downsample(identity)\n        out = x3 + identity\n        return F.relu(out)\n\n\nclass OSBlockINin(nn.Module):\n    \"\"\"Omni-scale feature learning block with instance normalization.\"\"\"\n\n    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):\n        super(OSBlockINin, self).__init__()\n        assert T >= 1\n        assert out_channels >= reduction and out_channels % reduction == 0\n        mid_channels = out_channels // reduction\n\n        self.conv1 = Conv1x1(in_channels, mid_channels)\n        self.conv2 = nn.ModuleList()\n        for t in range(1, T + 1):\n            self.conv2 += [LightConvStream(mid_channels, mid_channels, t)]\n        self.gate = ChannelGate(mid_channels)\n        self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)\n        self.downsample = None\n        if in_channels != out_channels:\n            self.downsample = Conv1x1Linear(in_channels, out_channels)\n        self.IN = nn.InstanceNorm2d(out_channels, affine=True)\n\n    def forward(self, x):\n        identity = x\n        x1 = self.conv1(x)\n        x2 = 0\n        for conv2_t in self.conv2:\n            x2_t = conv2_t(x1)\n            x2 = x2 + self.gate(x2_t)\n        x3 = self.conv3(x2)\n        x3 = self.IN(x3) # IN inside residual\n        if self.downsample is not None:\n            identity = self.downsample(identity)\n        out = x3 + identity\n        return F.relu(out)\n\n\n##########\n# Network architecture\n##########\nclass OSNet(nn.Module):\n    \"\"\"Omni-Scale Network.\n    \n    Reference:\n        - Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.\n        - Zhou et al. Learning Generalisable Omni-Scale Representations\n          for Person Re-Identification. TPAMI, 2021.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        blocks,\n        layers,\n        channels,\n        feature_dim=512,\n        loss='softmax',\n        conv1_IN=False,\n        **kwargs\n    ):\n        super(OSNet, self).__init__()\n        num_blocks = len(blocks)\n        assert num_blocks == len(layers)\n        assert num_blocks == len(channels) - 1\n        self.loss = loss\n        self.feature_dim = feature_dim\n\n        # convolutional backbone\n        self.conv1 = ConvLayer(\n            3, channels[0], 7, stride=2, padding=3, IN=conv1_IN\n        )\n        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)\n        self.conv2 = self._make_layer(\n            blocks[0], layers[0], channels[0], channels[1]\n        )\n        self.pool2 = nn.Sequential(\n            Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2)\n        )\n        self.conv3 = self._make_layer(\n            blocks[1], layers[1], channels[1], channels[2]\n        )\n        self.pool3 = nn.Sequential(\n            Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2)\n        )\n        self.conv4 = self._make_layer(\n            blocks[2], layers[2], channels[2], channels[3]\n        )\n        self.conv5 = Conv1x1(channels[3], channels[3])\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        # fully connected layer\n        self.fc = self._construct_fc_layer(\n            self.feature_dim, channels[3], dropout_p=None\n        )\n        # identity classification layer\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _make_layer(self, blocks, layer, in_channels, out_channels):\n        layers = []\n        layers += [blocks[0](in_channels, out_channels)]\n        for i in range(1, len(blocks)):\n            layers += [blocks[i](out_channels, out_channels)]\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        if fc_dims is None or fc_dims < 0:\n            self.feature_dim = input_dim\n            return None\n\n        if isinstance(fc_dims, int):\n            fc_dims = [fc_dims]\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU())\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.InstanceNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.maxpool(x)\n        x = self.conv2(x)\n        x = self.pool2(x)\n        x = self.conv3(x)\n        x = self.pool3(x)\n        x = self.conv4(x)\n        x = self.conv5(x)\n        return x\n\n    def forward(self, x, return_featuremaps=False):\n        x = self.featuremaps(x)\n        if return_featuremaps:\n            return x\n        v = self.global_avgpool(x)\n        v = v.view(v.size(0), -1)\n        if self.fc is not None:\n            v = self.fc(v)\n        if not self.training:\n            return v\n        y = self.classifier(v)\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, key=''):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    import os\n    import errno\n    import gdown\n    from collections import OrderedDict\n\n    def _get_torch_home():\n        ENV_TORCH_HOME = 'TORCH_HOME'\n        ENV_XDG_CACHE_HOME = 'XDG_CACHE_HOME'\n        DEFAULT_CACHE_DIR = '~/.cache'\n        torch_home = os.path.expanduser(\n            os.getenv(\n                ENV_TORCH_HOME,\n                os.path.join(\n                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), 'torch'\n                )\n            )\n        )\n        return torch_home\n\n    torch_home = _get_torch_home()\n    model_dir = os.path.join(torch_home, 'checkpoints')\n    try:\n        os.makedirs(model_dir)\n    except OSError as e:\n        if e.errno == errno.EEXIST:\n            # Directory already exists, ignore.\n            pass\n        else:\n            # Unexpected OSError, re-raise.\n            raise\n    filename = key + '_imagenet.pth'\n    cached_file = os.path.join(model_dir, filename)\n\n    if not os.path.exists(cached_file):\n        gdown.download(pretrained_urls[key], cached_file, quiet=False)\n\n    state_dict = torch.load(cached_file)\n    model_dict = model.state_dict()\n    new_state_dict = OrderedDict()\n    matched_layers, discarded_layers = [], []\n\n    for k, v in state_dict.items():\n        if k.startswith('module.'):\n            k = k[7:] # discard module.\n\n        if k in model_dict and model_dict[k].size() == v.size():\n            new_state_dict[k] = v\n            matched_layers.append(k)\n        else:\n            discarded_layers.append(k)\n\n    model_dict.update(new_state_dict)\n    model.load_state_dict(model_dict)\n\n    if len(matched_layers) == 0:\n        warnings.warn(\n            'The pretrained weights from \"{}\" cannot be loaded, '\n            'please check the key names manually '\n            '(** ignored and continue **)'.format(cached_file)\n        )\n    else:\n        print(\n            'Successfully loaded imagenet pretrained weights from \"{}\"'.\n            format(cached_file)\n        )\n        if len(discarded_layers) > 0:\n            print(\n                '** The following layers are discarded '\n                'due to unmatched keys or layer size: {}'.\n                format(discarded_layers)\n            )\n\n\n##########\n# Instantiation\n##########\ndef osnet_ain_x1_0(\n    num_classes=1000, pretrained=True, loss='softmax', **kwargs\n):\n    model = OSNet(\n        num_classes,\n        blocks=[\n            [OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],\n            [OSBlockINin, OSBlock]\n        ],\n        layers=[2, 2, 2],\n        channels=[64, 256, 384, 512],\n        loss=loss,\n        conv1_IN=True,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_ain_x1_0')\n    return model\n\n\ndef osnet_ain_x0_75(\n    num_classes=1000, pretrained=True, loss='softmax', **kwargs\n):\n    model = OSNet(\n        num_classes,\n        blocks=[\n            [OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],\n            [OSBlockINin, OSBlock]\n        ],\n        layers=[2, 2, 2],\n        channels=[48, 192, 288, 384],\n        loss=loss,\n        conv1_IN=True,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_ain_x0_75')\n    return model\n\n\ndef osnet_ain_x0_5(\n    num_classes=1000, pretrained=True, loss='softmax', **kwargs\n):\n    model = OSNet(\n        num_classes,\n        blocks=[\n            [OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],\n            [OSBlockINin, OSBlock]\n        ],\n        layers=[2, 2, 2],\n        channels=[32, 128, 192, 256],\n        loss=loss,\n        conv1_IN=True,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_ain_x0_5')\n    return model\n\n\ndef osnet_ain_x0_25(\n    num_classes=1000, pretrained=True, loss='softmax', **kwargs\n):\n    model = OSNet(\n        num_classes,\n        blocks=[\n            [OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin],\n            [OSBlockINin, OSBlock]\n        ],\n        layers=[2, 2, 2],\n        channels=[16, 64, 96, 128],\n        loss=loss,\n        conv1_IN=True,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, key='osnet_ain_x0_25')\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/pcb.py",
    "content": "from __future__ import division, absolute_import\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = ['pcb_p6', 'pcb_p4']\n\nmodel_urls = {\n    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(\n        in_planes,\n        out_planes,\n        kernel_size=3,\n        stride=stride,\n        padding=1,\n        bias=False\n    )\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.conv2 = nn.Conv2d(\n            planes,\n            planes,\n            kernel_size=3,\n            stride=stride,\n            padding=1,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.conv3 = nn.Conv2d(\n            planes, planes * self.expansion, kernel_size=1, bias=False\n        )\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass DimReduceLayer(nn.Module):\n\n    def __init__(self, in_channels, out_channels, nonlinear):\n        super(DimReduceLayer, self).__init__()\n        layers = []\n        layers.append(\n            nn.Conv2d(\n                in_channels, out_channels, 1, stride=1, padding=0, bias=False\n            )\n        )\n        layers.append(nn.BatchNorm2d(out_channels))\n\n        if nonlinear == 'relu':\n            layers.append(nn.ReLU(inplace=True))\n        elif nonlinear == 'leakyrelu':\n            layers.append(nn.LeakyReLU(0.1))\n\n        self.layers = nn.Sequential(*layers)\n\n    def forward(self, x):\n        return self.layers(x)\n\n\nclass PCB(nn.Module):\n    \"\"\"Part-based Convolutional Baseline.\n\n    Reference:\n        Sun et al. Beyond Part Models: Person Retrieval with Refined\n        Part Pooling (and A Strong Convolutional Baseline). ECCV 2018.\n\n    Public keys:\n        - ``pcb_p4``: PCB with 4-part strips.\n        - ``pcb_p6``: PCB with 6-part strips.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        block,\n        layers,\n        parts=6,\n        reduced_dim=256,\n        nonlinear='relu',\n        **kwargs\n    ):\n        self.inplanes = 64\n        super(PCB, self).__init__()\n        self.loss = loss\n        self.parts = parts\n        self.feature_dim = 512 * block.expansion\n\n        # backbone network\n        self.conv1 = nn.Conv2d(\n            3, 64, kernel_size=7, stride=2, padding=3, bias=False\n        )\n        self.bn1 = nn.BatchNorm2d(64)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n        self.layer4 = self._make_layer(block, 512, layers[3], stride=1)\n\n        # pcb layers\n        self.parts_avgpool = nn.AdaptiveAvgPool2d((self.parts, 1))\n        self.dropout = nn.Dropout(p=0.5)\n        self.conv5 = DimReduceLayer(\n            512 * block.expansion, reduced_dim, nonlinear=nonlinear\n        )\n        self.feature_dim = reduced_dim\n        self.classifier = nn.ModuleList(\n            [\n                nn.Linear(self.feature_dim, num_classes)\n                for _ in range(self.parts)\n            ]\n        )\n\n        self._init_params()\n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    bias=False\n                ),\n                nn.BatchNorm2d(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v_g = self.parts_avgpool(f)\n\n        if not self.training:\n            v_g = F.normalize(v_g, p=2, dim=1)\n            return v_g.view(v_g.size(0), -1)\n\n        v_g = self.dropout(v_g)\n        v_h = self.conv5(v_g)\n\n        y = []\n        for i in range(self.parts):\n            v_h_i = v_h[:, :, i, :]\n            v_h_i = v_h_i.view(v_h_i.size(0), -1)\n            y_i = self.classifier[i](v_h_i)\n            y.append(y_i)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            v_g = F.normalize(v_g, p=2, dim=1)\n            return y, v_g.view(v_g.size(0), -1)\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef pcb_p6(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = PCB(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 6, 3],\n        last_stride=1,\n        parts=6,\n        reduced_dim=256,\n        nonlinear='relu',\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n\n\ndef pcb_p4(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = PCB(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 6, 3],\n        last_stride=1,\n        parts=4,\n        reduced_dim=256,\n        nonlinear='relu',\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/resnet.py",
    "content": "\"\"\"\nCode source: https://github.com/pytorch/vision\n\"\"\"\nfrom __future__ import division, absolute_import\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\n\n__all__ = [\n    'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152',\n    'resnext50_32x4d', 'resnext101_32x8d', 'resnet50_fc512'\n]\n\nmodel_urls = {\n    'resnet18':\n    'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n    'resnet34':\n    'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n    'resnet50':\n    'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101':\n    'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152':\n    'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n    'resnext50_32x4d':\n    'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',\n    'resnext101_32x8d':\n    'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',\n}\n\n\ndef conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(\n        in_planes,\n        out_planes,\n        kernel_size=3,\n        stride=stride,\n        padding=dilation,\n        groups=groups,\n        bias=False,\n        dilation=dilation\n    )\n\n\ndef conv1x1(in_planes, out_planes, stride=1):\n    \"\"\"1x1 convolution\"\"\"\n    return nn.Conv2d(\n        in_planes, out_planes, kernel_size=1, stride=stride, bias=False\n    )\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(\n        self,\n        inplanes,\n        planes,\n        stride=1,\n        downsample=None,\n        groups=1,\n        base_width=64,\n        dilation=1,\n        norm_layer=None\n    ):\n        super(BasicBlock, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        if groups != 1 or base_width != 64:\n            raise ValueError(\n                'BasicBlock only supports groups=1 and base_width=64'\n            )\n        if dilation > 1:\n            raise NotImplementedError(\n                \"Dilation > 1 not supported in BasicBlock\"\n            )\n        # Both self.conv1 and self.downsample layers downsample the input when stride != 1\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = norm_layer(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = norm_layer(planes)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        identity = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(\n        self,\n        inplanes,\n        planes,\n        stride=1,\n        downsample=None,\n        groups=1,\n        base_width=64,\n        dilation=1,\n        norm_layer=None\n    ):\n        super(Bottleneck, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        width = int(planes * (base_width/64.)) * groups\n        # Both self.conv2 and self.downsample layers downsample the input when stride != 1\n        self.conv1 = conv1x1(inplanes, width)\n        self.bn1 = norm_layer(width)\n        self.conv2 = conv3x3(width, width, stride, groups, dilation)\n        self.bn2 = norm_layer(width)\n        self.conv3 = conv1x1(width, planes * self.expansion)\n        self.bn3 = norm_layer(planes * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        identity = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            identity = self.downsample(x)\n\n        out += identity\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n    \"\"\"Residual network.\n    \n    Reference:\n        - He et al. Deep Residual Learning for Image Recognition. CVPR 2016.\n        - Xie et al. Aggregated Residual Transformations for Deep Neural Networks. CVPR 2017.\n\n    Public keys:\n        - ``resnet18``: ResNet18.\n        - ``resnet34``: ResNet34.\n        - ``resnet50``: ResNet50.\n        - ``resnet101``: ResNet101.\n        - ``resnet152``: ResNet152.\n        - ``resnext50_32x4d``: ResNeXt50.\n        - ``resnext101_32x8d``: ResNeXt101.\n        - ``resnet50_fc512``: ResNet50 + FC.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        block,\n        layers,\n        zero_init_residual=False,\n        groups=1,\n        width_per_group=64,\n        replace_stride_with_dilation=None,\n        norm_layer=None,\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    ):\n        super(ResNet, self).__init__()\n        if norm_layer is None:\n            norm_layer = nn.BatchNorm2d\n        self._norm_layer = norm_layer\n        self.loss = loss\n        self.feature_dim = 512 * block.expansion\n        self.inplanes = 64\n        self.dilation = 1\n        if replace_stride_with_dilation is None:\n            # each element in the tuple indicates if we should replace\n            # the 2x2 stride with a dilated convolution instead\n            replace_stride_with_dilation = [False, False, False]\n        if len(replace_stride_with_dilation) != 3:\n            raise ValueError(\n                \"replace_stride_with_dilation should be None \"\n                \"or a 3-element tuple, got {}\".\n                format(replace_stride_with_dilation)\n            )\n        self.groups = groups\n        self.base_width = width_per_group\n        self.conv1 = nn.Conv2d(\n            3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False\n        )\n        self.bn1 = norm_layer(self.inplanes)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(\n            block,\n            128,\n            layers[1],\n            stride=2,\n            dilate=replace_stride_with_dilation[0]\n        )\n        self.layer3 = self._make_layer(\n            block,\n            256,\n            layers[2],\n            stride=2,\n            dilate=replace_stride_with_dilation[1]\n        )\n        self.layer4 = self._make_layer(\n            block,\n            512,\n            layers[3],\n            stride=last_stride,\n            dilate=replace_stride_with_dilation[2]\n        )\n        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.fc = self._construct_fc_layer(\n            fc_dims, 512 * block.expansion, dropout_p\n        )\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n        # Zero-initialize the last BN in each residual branch,\n        # so that the residual branch starts with zeros, and each residual block behaves like an identity.\n        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677\n        if zero_init_residual:\n            for m in self.modules():\n                if isinstance(m, Bottleneck):\n                    nn.init.constant_(m.bn3.weight, 0)\n                elif isinstance(m, BasicBlock):\n                    nn.init.constant_(m.bn2.weight, 0)\n\n    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):\n        norm_layer = self._norm_layer\n        downsample = None\n        previous_dilation = self.dilation\n        if dilate:\n            self.dilation *= stride\n            stride = 1\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                conv1x1(self.inplanes, planes * block.expansion, stride),\n                norm_layer(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(\n            block(\n                self.inplanes, planes, stride, downsample, self.groups,\n                self.base_width, previous_dilation, norm_layer\n            )\n        )\n        self.inplanes = planes * block.expansion\n        for _ in range(1, blocks):\n            layers.append(\n                block(\n                    self.inplanes,\n                    planes,\n                    groups=self.groups,\n                    base_width=self.base_width,\n                    dilation=self.dilation,\n                    norm_layer=norm_layer\n                )\n            )\n\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if self.fc is not None:\n            v = self.fc(v)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\n\"\"\"ResNet\"\"\"\n\n\ndef resnet18(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=BasicBlock,\n        layers=[2, 2, 2, 2],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet18'])\n    return model\n\n\ndef resnet34(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=BasicBlock,\n        layers=[3, 4, 6, 3],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet34'])\n    return model\n\n\ndef resnet50(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 6, 3],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n\n\ndef resnet101(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 23, 3],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet101'])\n    return model\n\n\ndef resnet152(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 8, 36, 3],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet152'])\n    return model\n\n\n\"\"\"ResNeXt\"\"\"\n\n\ndef resnext50_32x4d(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 6, 3],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        groups=32,\n        width_per_group=4,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnext50_32x4d'])\n    return model\n\n\ndef resnext101_32x8d(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 23, 3],\n        last_stride=2,\n        fc_dims=None,\n        dropout_p=None,\n        groups=32,\n        width_per_group=8,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnext101_32x8d'])\n    return model\n\n\n\"\"\"\nResNet + FC\n\"\"\"\n\n\ndef resnet50_fc512(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNet(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 6, 3],\n        last_stride=1,\n        fc_dims=[512],\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/resnet_ibn_a.py",
    "content": "\"\"\"\nCredit to https://github.com/XingangPan/IBN-Net.\n\"\"\"\nfrom __future__ import division, absolute_import\nimport math\nimport torch\nimport torch.nn as nn\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['resnet50_ibn_a']\n\nmodel_urls = {\n    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"3x3 convolution with padding\"\n    return nn.Conv2d(\n        in_planes,\n        out_planes,\n        kernel_size=3,\n        stride=stride,\n        padding=1,\n        bias=False\n    )\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass IBN(nn.Module):\n\n    def __init__(self, planes):\n        super(IBN, self).__init__()\n        half1 = int(planes / 2)\n        self.half = half1\n        half2 = planes - half1\n        self.IN = nn.InstanceNorm2d(half1, affine=True)\n        self.BN = nn.BatchNorm2d(half2)\n\n    def forward(self, x):\n        split = torch.split(x, self.half, 1)\n        out1 = self.IN(split[0].contiguous())\n        out2 = self.BN(split[1].contiguous())\n        out = torch.cat((out1, out2), 1)\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, ibn=False, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        if ibn:\n            self.bn1 = IBN(planes)\n        else:\n            self.bn1 = nn.BatchNorm2d(planes)\n        self.conv2 = nn.Conv2d(\n            planes,\n            planes,\n            kernel_size=3,\n            stride=stride,\n            padding=1,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.conv3 = nn.Conv2d(\n            planes, planes * self.expansion, kernel_size=1, bias=False\n        )\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n    \"\"\"Residual network + IBN layer.\n    \n    Reference:\n        - He et al. Deep Residual Learning for Image Recognition. CVPR 2016.\n        - Pan et al. Two at Once: Enhancing Learning and Generalization\n          Capacities via IBN-Net. ECCV 2018.\n    \"\"\"\n\n    def __init__(\n        self,\n        block,\n        layers,\n        num_classes=1000,\n        loss='softmax',\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    ):\n        scale = 64\n        self.inplanes = scale\n        super(ResNet, self).__init__()\n        self.loss = loss\n        self.feature_dim = scale * 8 * block.expansion\n\n        self.conv1 = nn.Conv2d(\n            3, scale, kernel_size=7, stride=2, padding=3, bias=False\n        )\n        self.bn1 = nn.BatchNorm2d(scale)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, scale, layers[0])\n        self.layer2 = self._make_layer(block, scale * 2, layers[1], stride=2)\n        self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2)\n        self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2)\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.fc = self._construct_fc_layer(\n            fc_dims, scale * 8 * block.expansion, dropout_p\n        )\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n            elif isinstance(m, nn.InstanceNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    bias=False\n                ),\n                nn.BatchNorm2d(planes * block.expansion),\n            )\n\n        layers = []\n        ibn = True\n        if planes == 512:\n            ibn = False\n        layers.append(block(self.inplanes, planes, ibn, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes, ibn))\n\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.avgpool(f)\n        v = v.view(v.size(0), -1)\n        if self.fc is not None:\n            v = self.fc(v)\n        if not self.training:\n            return v\n        y = self.classifier(v)\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef resnet50_ibn_a(num_classes, loss='softmax', pretrained=False, **kwargs):\n    model = ResNet(\n        Bottleneck, [3, 4, 6, 3], num_classes=num_classes, loss=loss, **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/resnet_ibn_b.py",
    "content": "\"\"\"\nCredit to https://github.com/XingangPan/IBN-Net.\n\"\"\"\nfrom __future__ import division, absolute_import\nimport math\nimport torch.nn as nn\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['resnet50_ibn_b']\n\nmodel_urls = {\n    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"3x3 convolution with padding\"\n    return nn.Conv2d(\n        in_planes,\n        out_planes,\n        kernel_size=3,\n        stride=stride,\n        padding=1,\n        bias=False\n    )\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None, IN=False):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.conv2 = nn.Conv2d(\n            planes,\n            planes,\n            kernel_size=3,\n            stride=stride,\n            padding=1,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.conv3 = nn.Conv2d(\n            planes, planes * self.expansion, kernel_size=1, bias=False\n        )\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion)\n        self.IN = None\n        if IN:\n            self.IN = nn.InstanceNorm2d(planes * 4, affine=True)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        if self.IN is not None:\n            out = self.IN(out)\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNet(nn.Module):\n    \"\"\"Residual network + IBN layer.\n    \n    Reference:\n        - He et al. Deep Residual Learning for Image Recognition. CVPR 2016.\n        - Pan et al. Two at Once: Enhancing Learning and Generalization\n          Capacities via IBN-Net. ECCV 2018.\n    \"\"\"\n\n    def __init__(\n        self,\n        block,\n        layers,\n        num_classes=1000,\n        loss='softmax',\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    ):\n        scale = 64\n        self.inplanes = scale\n        super(ResNet, self).__init__()\n        self.loss = loss\n        self.feature_dim = scale * 8 * block.expansion\n\n        self.conv1 = nn.Conv2d(\n            3, scale, kernel_size=7, stride=2, padding=3, bias=False\n        )\n        self.bn1 = nn.InstanceNorm2d(scale, affine=True)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(\n            block, scale, layers[0], stride=1, IN=True\n        )\n        self.layer2 = self._make_layer(\n            block, scale * 2, layers[1], stride=2, IN=True\n        )\n        self.layer3 = self._make_layer(block, scale * 4, layers[2], stride=2)\n        self.layer4 = self._make_layer(block, scale * 8, layers[3], stride=2)\n        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))\n        self.fc = self._construct_fc_layer(\n            fc_dims, scale * 8 * block.expansion, dropout_p\n        )\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels\n                m.weight.data.normal_(0, math.sqrt(2. / n))\n            elif isinstance(m, nn.BatchNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n            elif isinstance(m, nn.InstanceNorm2d):\n                m.weight.data.fill_(1)\n                m.bias.data.zero_()\n\n    def _make_layer(self, block, planes, blocks, stride=1, IN=False):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    bias=False\n                ),\n                nn.BatchNorm2d(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks - 1):\n            layers.append(block(self.inplanes, planes))\n        layers.append(block(self.inplanes, planes, IN=IN))\n\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.avgpool(f)\n        v = v.view(v.size(0), -1)\n        if self.fc is not None:\n            v = self.fc(v)\n        if not self.training:\n            return v\n        y = self.classifier(v)\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef resnet50_ibn_b(num_classes, loss='softmax', pretrained=False, **kwargs):\n    model = ResNet(\n        Bottleneck, [3, 4, 6, 3], num_classes=num_classes, loss=loss, **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/resnetmid.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\n\n__all__ = ['resnet50mid']\n\nmodel_urls = {\n    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',\n    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',\n    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',\n    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',\n    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',\n}\n\n\ndef conv3x3(in_planes, out_planes, stride=1):\n    \"\"\"3x3 convolution with padding\"\"\"\n    return nn.Conv2d(\n        in_planes,\n        out_planes,\n        kernel_size=3,\n        stride=stride,\n        padding=1,\n        bias=False\n    )\n\n\nclass BasicBlock(nn.Module):\n    expansion = 1\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(BasicBlock, self).__init__()\n        self.conv1 = conv3x3(inplanes, planes, stride)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.relu = nn.ReLU(inplace=True)\n        self.conv2 = conv3x3(planes, planes)\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass Bottleneck(nn.Module):\n    expansion = 4\n\n    def __init__(self, inplanes, planes, stride=1, downsample=None):\n        super(Bottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.conv2 = nn.Conv2d(\n            planes,\n            planes,\n            kernel_size=3,\n            stride=stride,\n            padding=1,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.conv3 = nn.Conv2d(\n            planes, planes * self.expansion, kernel_size=1, bias=False\n        )\n        self.bn3 = nn.BatchNorm2d(planes * self.expansion)\n        self.relu = nn.ReLU(inplace=True)\n        self.downsample = downsample\n        self.stride = stride\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out += residual\n        out = self.relu(out)\n\n        return out\n\n\nclass ResNetMid(nn.Module):\n    \"\"\"Residual network + mid-level features.\n    \n    Reference:\n        Yu et al. The Devil is in the Middle: Exploiting Mid-level Representations for\n        Cross-Domain Instance Matching. arXiv:1711.08106.\n\n    Public keys:\n        - ``resnet50mid``: ResNet50 + mid-level feature fusion.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        block,\n        layers,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    ):\n        self.inplanes = 64\n        super(ResNetMid, self).__init__()\n        self.loss = loss\n        self.feature_dim = 512 * block.expansion\n\n        # backbone network\n        self.conv1 = nn.Conv2d(\n            3, 64, kernel_size=7, stride=2, padding=3, bias=False\n        )\n        self.bn1 = nn.BatchNorm2d(64)\n        self.relu = nn.ReLU(inplace=True)\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n        self.layer1 = self._make_layer(block, 64, layers[0])\n        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)\n        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)\n        self.layer4 = self._make_layer(\n            block, 512, layers[3], stride=last_stride\n        )\n\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        assert fc_dims is not None\n        self.fc_fusion = self._construct_fc_layer(\n            fc_dims, 512 * block.expansion * 2\n        )\n        self.feature_dim += 512 * block.expansion\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _make_layer(self, block, planes, blocks, stride=1):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=1,\n                    stride=stride,\n                    bias=False\n                ),\n                nn.BatchNorm2d(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(block(self.inplanes, planes, stride, downsample))\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes))\n\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.bn1(x)\n        x = self.relu(x)\n        x = self.maxpool(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x4a = self.layer4[0](x)\n        x4b = self.layer4[1](x4a)\n        x4c = self.layer4[2](x4b)\n        return x4a, x4b, x4c\n\n    def forward(self, x):\n        x4a, x4b, x4c = self.featuremaps(x)\n\n        v4a = self.global_avgpool(x4a)\n        v4b = self.global_avgpool(x4b)\n        v4c = self.global_avgpool(x4c)\n        v4ab = torch.cat([v4a, v4b], 1)\n        v4ab = v4ab.view(v4ab.size(0), -1)\n        v4ab = self.fc_fusion(v4ab)\n        v4c = v4c.view(v4c.size(0), -1)\n        v = torch.cat([v4ab, v4c], 1)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\n\"\"\"\nResidual network configurations:\n--\nresnet18: block=BasicBlock, layers=[2, 2, 2, 2]\nresnet34: block=BasicBlock, layers=[3, 4, 6, 3]\nresnet50: block=Bottleneck, layers=[3, 4, 6, 3]\nresnet101: block=Bottleneck, layers=[3, 4, 23, 3]\nresnet152: block=Bottleneck, layers=[3, 8, 36, 3]\n\"\"\"\n\n\ndef resnet50mid(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ResNetMid(\n        num_classes=num_classes,\n        loss=loss,\n        block=Bottleneck,\n        layers=[3, 4, 6, 3],\n        last_stride=2,\n        fc_dims=[1024],\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['resnet50'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/senet.py",
    "content": "from __future__ import division, absolute_import\nimport math\nfrom collections import OrderedDict\nimport torch.nn as nn\nfrom torch.utils import model_zoo\n\n__all__ = [\n    'senet154', 'se_resnet50', 'se_resnet101', 'se_resnet152',\n    'se_resnext50_32x4d', 'se_resnext101_32x4d', 'se_resnet50_fc512'\n]\n\"\"\"\nCode imported from https://github.com/Cadene/pretrained-models.pytorch\n\"\"\"\n\npretrained_settings = {\n    'senet154': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/senet154-c7b49a05.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224],\n            'input_range': [0, 1],\n            'mean': [0.485, 0.456, 0.406],\n            'std': [0.229, 0.224, 0.225],\n            'num_classes': 1000\n        }\n    },\n    'se_resnet50': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet50-ce0d4300.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224],\n            'input_range': [0, 1],\n            'mean': [0.485, 0.456, 0.406],\n            'std': [0.229, 0.224, 0.225],\n            'num_classes': 1000\n        }\n    },\n    'se_resnet101': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet101-7e38fcc6.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224],\n            'input_range': [0, 1],\n            'mean': [0.485, 0.456, 0.406],\n            'std': [0.229, 0.224, 0.225],\n            'num_classes': 1000\n        }\n    },\n    'se_resnet152': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/se_resnet152-d17c99b7.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224],\n            'input_range': [0, 1],\n            'mean': [0.485, 0.456, 0.406],\n            'std': [0.229, 0.224, 0.225],\n            'num_classes': 1000\n        }\n    },\n    'se_resnext50_32x4d': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224],\n            'input_range': [0, 1],\n            'mean': [0.485, 0.456, 0.406],\n            'std': [0.229, 0.224, 0.225],\n            'num_classes': 1000\n        }\n    },\n    'se_resnext101_32x4d': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/se_resnext101_32x4d-3b2fe3d8.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 224, 224],\n            'input_range': [0, 1],\n            'mean': [0.485, 0.456, 0.406],\n            'std': [0.229, 0.224, 0.225],\n            'num_classes': 1000\n        }\n    },\n}\n\n\nclass SEModule(nn.Module):\n\n    def __init__(self, channels, reduction):\n        super(SEModule, self).__init__()\n        self.avg_pool = nn.AdaptiveAvgPool2d(1)\n        self.fc1 = nn.Conv2d(\n            channels, channels // reduction, kernel_size=1, padding=0\n        )\n        self.relu = nn.ReLU(inplace=True)\n        self.fc2 = nn.Conv2d(\n            channels // reduction, channels, kernel_size=1, padding=0\n        )\n        self.sigmoid = nn.Sigmoid()\n\n    def forward(self, x):\n        module_input = x\n        x = self.avg_pool(x)\n        x = self.fc1(x)\n        x = self.relu(x)\n        x = self.fc2(x)\n        x = self.sigmoid(x)\n        return module_input * x\n\n\nclass Bottleneck(nn.Module):\n    \"\"\"\n    Base class for bottlenecks that implements `forward()` method.\n    \"\"\"\n\n    def forward(self, x):\n        residual = x\n\n        out = self.conv1(x)\n        out = self.bn1(out)\n        out = self.relu(out)\n\n        out = self.conv2(out)\n        out = self.bn2(out)\n        out = self.relu(out)\n\n        out = self.conv3(out)\n        out = self.bn3(out)\n\n        if self.downsample is not None:\n            residual = self.downsample(x)\n\n        out = self.se_module(out) + residual\n        out = self.relu(out)\n\n        return out\n\n\nclass SEBottleneck(Bottleneck):\n    \"\"\"\n    Bottleneck for SENet154.\n    \"\"\"\n    expansion = 4\n\n    def __init__(\n        self, inplanes, planes, groups, reduction, stride=1, downsample=None\n    ):\n        super(SEBottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(inplanes, planes * 2, kernel_size=1, bias=False)\n        self.bn1 = nn.BatchNorm2d(planes * 2)\n        self.conv2 = nn.Conv2d(\n            planes * 2,\n            planes * 4,\n            kernel_size=3,\n            stride=stride,\n            padding=1,\n            groups=groups,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(planes * 4)\n        self.conv3 = nn.Conv2d(\n            planes * 4, planes * 4, kernel_size=1, bias=False\n        )\n        self.bn3 = nn.BatchNorm2d(planes * 4)\n        self.relu = nn.ReLU(inplace=True)\n        self.se_module = SEModule(planes * 4, reduction=reduction)\n        self.downsample = downsample\n        self.stride = stride\n\n\nclass SEResNetBottleneck(Bottleneck):\n    \"\"\"\n    ResNet bottleneck with a Squeeze-and-Excitation module. It follows Caffe\n    implementation and uses `stride=stride` in `conv1` and not in `conv2`\n    (the latter is used in the torchvision implementation of ResNet).\n    \"\"\"\n    expansion = 4\n\n    def __init__(\n        self, inplanes, planes, groups, reduction, stride=1, downsample=None\n    ):\n        super(SEResNetBottleneck, self).__init__()\n        self.conv1 = nn.Conv2d(\n            inplanes, planes, kernel_size=1, bias=False, stride=stride\n        )\n        self.bn1 = nn.BatchNorm2d(planes)\n        self.conv2 = nn.Conv2d(\n            planes,\n            planes,\n            kernel_size=3,\n            padding=1,\n            groups=groups,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(planes)\n        self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)\n        self.bn3 = nn.BatchNorm2d(planes * 4)\n        self.relu = nn.ReLU(inplace=True)\n        self.se_module = SEModule(planes * 4, reduction=reduction)\n        self.downsample = downsample\n        self.stride = stride\n\n\nclass SEResNeXtBottleneck(Bottleneck):\n    \"\"\"ResNeXt bottleneck type C with a Squeeze-and-Excitation module\"\"\"\n    expansion = 4\n\n    def __init__(\n        self,\n        inplanes,\n        planes,\n        groups,\n        reduction,\n        stride=1,\n        downsample=None,\n        base_width=4\n    ):\n        super(SEResNeXtBottleneck, self).__init__()\n        width = int(math.floor(planes * (base_width/64.)) * groups)\n        self.conv1 = nn.Conv2d(\n            inplanes, width, kernel_size=1, bias=False, stride=1\n        )\n        self.bn1 = nn.BatchNorm2d(width)\n        self.conv2 = nn.Conv2d(\n            width,\n            width,\n            kernel_size=3,\n            stride=stride,\n            padding=1,\n            groups=groups,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(width)\n        self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)\n        self.bn3 = nn.BatchNorm2d(planes * 4)\n        self.relu = nn.ReLU(inplace=True)\n        self.se_module = SEModule(planes * 4, reduction=reduction)\n        self.downsample = downsample\n        self.stride = stride\n\n\nclass SENet(nn.Module):\n    \"\"\"Squeeze-and-excitation network.\n    \n    Reference:\n        Hu et al. Squeeze-and-Excitation Networks. CVPR 2018.\n\n    Public keys:\n        - ``senet154``: SENet154.\n        - ``se_resnet50``: ResNet50 + SE.\n        - ``se_resnet101``: ResNet101 + SE.\n        - ``se_resnet152``: ResNet152 + SE.\n        - ``se_resnext50_32x4d``: ResNeXt50 (groups=32, width=4) + SE.\n        - ``se_resnext101_32x4d``: ResNeXt101 (groups=32, width=4) + SE.\n        - ``se_resnet50_fc512``: (ResNet50 + SE) + FC.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        block,\n        layers,\n        groups,\n        reduction,\n        dropout_p=0.2,\n        inplanes=128,\n        input_3x3=True,\n        downsample_kernel_size=3,\n        downsample_padding=1,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    ):\n        \"\"\"\n        Parameters\n        ----------\n        block (nn.Module): Bottleneck class.\n            - For SENet154: SEBottleneck\n            - For SE-ResNet models: SEResNetBottleneck\n            - For SE-ResNeXt models:  SEResNeXtBottleneck\n        layers (list of ints): Number of residual blocks for 4 layers of the\n            network (layer1...layer4).\n        groups (int): Number of groups for the 3x3 convolution in each\n            bottleneck block.\n            - For SENet154: 64\n            - For SE-ResNet models: 1\n            - For SE-ResNeXt models:  32\n        reduction (int): Reduction ratio for Squeeze-and-Excitation modules.\n            - For all models: 16\n        dropout_p (float or None): Drop probability for the Dropout layer.\n            If `None` the Dropout layer is not used.\n            - For SENet154: 0.2\n            - For SE-ResNet models: None\n            - For SE-ResNeXt models: None\n        inplanes (int):  Number of input channels for layer1.\n            - For SENet154: 128\n            - For SE-ResNet models: 64\n            - For SE-ResNeXt models: 64\n        input_3x3 (bool): If `True`, use three 3x3 convolutions instead of\n            a single 7x7 convolution in layer0.\n            - For SENet154: True\n            - For SE-ResNet models: False\n            - For SE-ResNeXt models: False\n        downsample_kernel_size (int): Kernel size for downsampling convolutions\n            in layer2, layer3 and layer4.\n            - For SENet154: 3\n            - For SE-ResNet models: 1\n            - For SE-ResNeXt models: 1\n        downsample_padding (int): Padding for downsampling convolutions in\n            layer2, layer3 and layer4.\n            - For SENet154: 1\n            - For SE-ResNet models: 0\n            - For SE-ResNeXt models: 0\n        num_classes (int): Number of outputs in `classifier` layer.\n        \"\"\"\n        super(SENet, self).__init__()\n        self.inplanes = inplanes\n        self.loss = loss\n\n        if input_3x3:\n            layer0_modules = [\n                (\n                    'conv1',\n                    nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)\n                ),\n                ('bn1', nn.BatchNorm2d(64)),\n                ('relu1', nn.ReLU(inplace=True)),\n                (\n                    'conv2',\n                    nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)\n                ),\n                ('bn2', nn.BatchNorm2d(64)),\n                ('relu2', nn.ReLU(inplace=True)),\n                (\n                    'conv3',\n                    nn.Conv2d(\n                        64, inplanes, 3, stride=1, padding=1, bias=False\n                    )\n                ),\n                ('bn3', nn.BatchNorm2d(inplanes)),\n                ('relu3', nn.ReLU(inplace=True)),\n            ]\n        else:\n            layer0_modules = [\n                (\n                    'conv1',\n                    nn.Conv2d(\n                        3,\n                        inplanes,\n                        kernel_size=7,\n                        stride=2,\n                        padding=3,\n                        bias=False\n                    )\n                ),\n                ('bn1', nn.BatchNorm2d(inplanes)),\n                ('relu1', nn.ReLU(inplace=True)),\n            ]\n        # To preserve compatibility with Caffe weights `ceil_mode=True`\n        # is used instead of `padding=1`.\n        layer0_modules.append(\n            ('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))\n        )\n        self.layer0 = nn.Sequential(OrderedDict(layer0_modules))\n        self.layer1 = self._make_layer(\n            block,\n            planes=64,\n            blocks=layers[0],\n            groups=groups,\n            reduction=reduction,\n            downsample_kernel_size=1,\n            downsample_padding=0\n        )\n        self.layer2 = self._make_layer(\n            block,\n            planes=128,\n            blocks=layers[1],\n            stride=2,\n            groups=groups,\n            reduction=reduction,\n            downsample_kernel_size=downsample_kernel_size,\n            downsample_padding=downsample_padding\n        )\n        self.layer3 = self._make_layer(\n            block,\n            planes=256,\n            blocks=layers[2],\n            stride=2,\n            groups=groups,\n            reduction=reduction,\n            downsample_kernel_size=downsample_kernel_size,\n            downsample_padding=downsample_padding\n        )\n        self.layer4 = self._make_layer(\n            block,\n            planes=512,\n            blocks=layers[3],\n            stride=last_stride,\n            groups=groups,\n            reduction=reduction,\n            downsample_kernel_size=downsample_kernel_size,\n            downsample_padding=downsample_padding\n        )\n\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.fc = self._construct_fc_layer(\n            fc_dims, 512 * block.expansion, dropout_p\n        )\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n    def _make_layer(\n        self,\n        block,\n        planes,\n        blocks,\n        groups,\n        reduction,\n        stride=1,\n        downsample_kernel_size=1,\n        downsample_padding=0\n    ):\n        downsample = None\n        if stride != 1 or self.inplanes != planes * block.expansion:\n            downsample = nn.Sequential(\n                nn.Conv2d(\n                    self.inplanes,\n                    planes * block.expansion,\n                    kernel_size=downsample_kernel_size,\n                    stride=stride,\n                    padding=downsample_padding,\n                    bias=False\n                ),\n                nn.BatchNorm2d(planes * block.expansion),\n            )\n\n        layers = []\n        layers.append(\n            block(\n                self.inplanes, planes, groups, reduction, stride, downsample\n            )\n        )\n        self.inplanes = planes * block.expansion\n        for i in range(1, blocks):\n            layers.append(block(self.inplanes, planes, groups, reduction))\n\n        return nn.Sequential(*layers)\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"\n        Construct fully connected layer\n\n        - fc_dims (list or tuple): dimensions of fc layers, if None,\n                                   no fc layers are constructed\n        - input_dim (int): input dimension\n        - dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def featuremaps(self, x):\n        x = self.layer0(x)\n        x = self.layer1(x)\n        x = self.layer2(x)\n        x = self.layer3(x)\n        x = self.layer4(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if self.fc is not None:\n            v = self.fc(v)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef senet154(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEBottleneck,\n        layers=[3, 8, 36, 3],\n        groups=64,\n        reduction=16,\n        dropout_p=0.2,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['senet154']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n\n\ndef se_resnet50(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEResNetBottleneck,\n        layers=[3, 4, 6, 3],\n        groups=1,\n        reduction=16,\n        dropout_p=None,\n        inplanes=64,\n        input_3x3=False,\n        downsample_kernel_size=1,\n        downsample_padding=0,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['se_resnet50']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n\n\ndef se_resnet50_fc512(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEResNetBottleneck,\n        layers=[3, 4, 6, 3],\n        groups=1,\n        reduction=16,\n        dropout_p=None,\n        inplanes=64,\n        input_3x3=False,\n        downsample_kernel_size=1,\n        downsample_padding=0,\n        last_stride=1,\n        fc_dims=[512],\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['se_resnet50']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n\n\ndef se_resnet101(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEResNetBottleneck,\n        layers=[3, 4, 23, 3],\n        groups=1,\n        reduction=16,\n        dropout_p=None,\n        inplanes=64,\n        input_3x3=False,\n        downsample_kernel_size=1,\n        downsample_padding=0,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['se_resnet101']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n\n\ndef se_resnet152(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEResNetBottleneck,\n        layers=[3, 8, 36, 3],\n        groups=1,\n        reduction=16,\n        dropout_p=None,\n        inplanes=64,\n        input_3x3=False,\n        downsample_kernel_size=1,\n        downsample_padding=0,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['se_resnet152']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n\n\ndef se_resnext50_32x4d(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEResNeXtBottleneck,\n        layers=[3, 4, 6, 3],\n        groups=32,\n        reduction=16,\n        dropout_p=None,\n        inplanes=64,\n        input_3x3=False,\n        downsample_kernel_size=1,\n        downsample_padding=0,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['se_resnext50_32x4d']['imagenet']['url'\n                                                                          ]\n        init_pretrained_weights(model, model_url)\n    return model\n\n\ndef se_resnext101_32x4d(\n    num_classes, loss='softmax', pretrained=True, **kwargs\n):\n    model = SENet(\n        num_classes=num_classes,\n        loss=loss,\n        block=SEResNeXtBottleneck,\n        layers=[3, 4, 23, 3],\n        groups=32,\n        reduction=16,\n        dropout_p=None,\n        inplanes=64,\n        input_3x3=False,\n        downsample_kernel_size=1,\n        downsample_padding=0,\n        last_stride=2,\n        fc_dims=None,\n        **kwargs\n    )\n    if pretrained:\n        model_url = pretrained_settings['se_resnext101_32x4d']['imagenet'][\n            'url']\n        init_pretrained_weights(model, model_url)\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/shufflenet.py",
    "content": "from __future__ import division, absolute_import\nimport torch\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\nfrom torch.nn import functional as F\n\n__all__ = ['shufflenet']\n\nmodel_urls = {\n    # training epoch = 90, top1 = 61.8\n    'imagenet':\n    'https://mega.nz/#!RDpUlQCY!tr_5xBEkelzDjveIYBBcGcovNCOrgfiJO9kiidz9fZM',\n}\n\n\nclass ChannelShuffle(nn.Module):\n\n    def __init__(self, num_groups):\n        super(ChannelShuffle, self).__init__()\n        self.g = num_groups\n\n    def forward(self, x):\n        b, c, h, w = x.size()\n        n = c // self.g\n        # reshape\n        x = x.view(b, self.g, n, h, w)\n        # transpose\n        x = x.permute(0, 2, 1, 3, 4).contiguous()\n        # flatten\n        x = x.view(b, c, h, w)\n        return x\n\n\nclass Bottleneck(nn.Module):\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        stride,\n        num_groups,\n        group_conv1x1=True\n    ):\n        super(Bottleneck, self).__init__()\n        assert stride in [1, 2], 'Warning: stride must be either 1 or 2'\n        self.stride = stride\n        mid_channels = out_channels // 4\n        if stride == 2:\n            out_channels -= in_channels\n        # group conv is not applied to first conv1x1 at stage 2\n        num_groups_conv1x1 = num_groups if group_conv1x1 else 1\n        self.conv1 = nn.Conv2d(\n            in_channels,\n            mid_channels,\n            1,\n            groups=num_groups_conv1x1,\n            bias=False\n        )\n        self.bn1 = nn.BatchNorm2d(mid_channels)\n        self.shuffle1 = ChannelShuffle(num_groups)\n        self.conv2 = nn.Conv2d(\n            mid_channels,\n            mid_channels,\n            3,\n            stride=stride,\n            padding=1,\n            groups=mid_channels,\n            bias=False\n        )\n        self.bn2 = nn.BatchNorm2d(mid_channels)\n        self.conv3 = nn.Conv2d(\n            mid_channels, out_channels, 1, groups=num_groups, bias=False\n        )\n        self.bn3 = nn.BatchNorm2d(out_channels)\n        if stride == 2:\n            self.shortcut = nn.AvgPool2d(3, stride=2, padding=1)\n\n    def forward(self, x):\n        out = F.relu(self.bn1(self.conv1(x)))\n        out = self.shuffle1(out)\n        out = self.bn2(self.conv2(out))\n        out = self.bn3(self.conv3(out))\n        if self.stride == 2:\n            res = self.shortcut(x)\n            out = F.relu(torch.cat([res, out], 1))\n        else:\n            out = F.relu(x + out)\n        return out\n\n\n# configuration of (num_groups: #out_channels) based on Table 1 in the paper\ncfg = {\n    1: [144, 288, 576],\n    2: [200, 400, 800],\n    3: [240, 480, 960],\n    4: [272, 544, 1088],\n    8: [384, 768, 1536],\n}\n\n\nclass ShuffleNet(nn.Module):\n    \"\"\"ShuffleNet.\n\n    Reference:\n        Zhang et al. ShuffleNet: An Extremely Efficient Convolutional Neural\n        Network for Mobile Devices. CVPR 2018.\n\n    Public keys:\n        - ``shufflenet``: ShuffleNet (groups=3).\n    \"\"\"\n\n    def __init__(self, num_classes, loss='softmax', num_groups=3, **kwargs):\n        super(ShuffleNet, self).__init__()\n        self.loss = loss\n\n        self.conv1 = nn.Sequential(\n            nn.Conv2d(3, 24, 3, stride=2, padding=1, bias=False),\n            nn.BatchNorm2d(24),\n            nn.ReLU(),\n            nn.MaxPool2d(3, stride=2, padding=1),\n        )\n\n        self.stage2 = nn.Sequential(\n            Bottleneck(\n                24, cfg[num_groups][0], 2, num_groups, group_conv1x1=False\n            ),\n            Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),\n            Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),\n            Bottleneck(cfg[num_groups][0], cfg[num_groups][0], 1, num_groups),\n        )\n\n        self.stage3 = nn.Sequential(\n            Bottleneck(cfg[num_groups][0], cfg[num_groups][1], 2, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][1], 1, num_groups),\n        )\n\n        self.stage4 = nn.Sequential(\n            Bottleneck(cfg[num_groups][1], cfg[num_groups][2], 2, num_groups),\n            Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),\n            Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),\n            Bottleneck(cfg[num_groups][2], cfg[num_groups][2], 1, num_groups),\n        )\n\n        self.classifier = nn.Linear(cfg[num_groups][2], num_classes)\n        self.feat_dim = cfg[num_groups][2]\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.stage2(x)\n        x = self.stage3(x)\n        x = self.stage4(x)\n        x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), -1)\n\n        if not self.training:\n            return x\n\n        y = self.classifier(x)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, x\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef shufflenet(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ShuffleNet(num_classes, loss, **kwargs)\n    if pretrained:\n        # init_pretrained_weights(model, model_urls['imagenet'])\n        import warnings\n        warnings.warn(\n            'The imagenet pretrained weights need to be manually downloaded from {}'\n            .format(model_urls['imagenet'])\n        )\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/shufflenetv2.py",
    "content": "\"\"\"\nCode source: https://github.com/pytorch/vision\n\"\"\"\nfrom __future__ import division, absolute_import\nimport torch\nimport torch.utils.model_zoo as model_zoo\nfrom torch import nn\n\n__all__ = [\n    'shufflenet_v2_x0_5', 'shufflenet_v2_x1_0', 'shufflenet_v2_x1_5',\n    'shufflenet_v2_x2_0'\n]\n\nmodel_urls = {\n    'shufflenetv2_x0.5':\n    'https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth',\n    'shufflenetv2_x1.0':\n    'https://download.pytorch.org/models/shufflenetv2_x1-5666bf0f80.pth',\n    'shufflenetv2_x1.5': None,\n    'shufflenetv2_x2.0': None,\n}\n\n\ndef channel_shuffle(x, groups):\n    batchsize, num_channels, height, width = x.data.size()\n    channels_per_group = num_channels // groups\n\n    # reshape\n    x = x.view(batchsize, groups, channels_per_group, height, width)\n\n    x = torch.transpose(x, 1, 2).contiguous()\n\n    # flatten\n    x = x.view(batchsize, -1, height, width)\n\n    return x\n\n\nclass InvertedResidual(nn.Module):\n\n    def __init__(self, inp, oup, stride):\n        super(InvertedResidual, self).__init__()\n\n        if not (1 <= stride <= 3):\n            raise ValueError('illegal stride value')\n        self.stride = stride\n\n        branch_features = oup // 2\n        assert (self.stride != 1) or (inp == branch_features << 1)\n\n        if self.stride > 1:\n            self.branch1 = nn.Sequential(\n                self.depthwise_conv(\n                    inp, inp, kernel_size=3, stride=self.stride, padding=1\n                ),\n                nn.BatchNorm2d(inp),\n                nn.Conv2d(\n                    inp,\n                    branch_features,\n                    kernel_size=1,\n                    stride=1,\n                    padding=0,\n                    bias=False\n                ),\n                nn.BatchNorm2d(branch_features),\n                nn.ReLU(inplace=True),\n            )\n\n        self.branch2 = nn.Sequential(\n            nn.Conv2d(\n                inp if (self.stride > 1) else branch_features,\n                branch_features,\n                kernel_size=1,\n                stride=1,\n                padding=0,\n                bias=False\n            ),\n            nn.BatchNorm2d(branch_features),\n            nn.ReLU(inplace=True),\n            self.depthwise_conv(\n                branch_features,\n                branch_features,\n                kernel_size=3,\n                stride=self.stride,\n                padding=1\n            ),\n            nn.BatchNorm2d(branch_features),\n            nn.Conv2d(\n                branch_features,\n                branch_features,\n                kernel_size=1,\n                stride=1,\n                padding=0,\n                bias=False\n            ),\n            nn.BatchNorm2d(branch_features),\n            nn.ReLU(inplace=True),\n        )\n\n    @staticmethod\n    def depthwise_conv(i, o, kernel_size, stride=1, padding=0, bias=False):\n        return nn.Conv2d(\n            i, o, kernel_size, stride, padding, bias=bias, groups=i\n        )\n\n    def forward(self, x):\n        if self.stride == 1:\n            x1, x2 = x.chunk(2, dim=1)\n            out = torch.cat((x1, self.branch2(x2)), dim=1)\n        else:\n            out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)\n\n        out = channel_shuffle(out, 2)\n\n        return out\n\n\nclass ShuffleNetV2(nn.Module):\n    \"\"\"ShuffleNetV2.\n    \n    Reference:\n        Ma et al. ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design. ECCV 2018.\n\n    Public keys:\n        - ``shufflenet_v2_x0_5``: ShuffleNetV2 x0.5.\n        - ``shufflenet_v2_x1_0``: ShuffleNetV2 x1.0.\n        - ``shufflenet_v2_x1_5``: ShuffleNetV2 x1.5.\n        - ``shufflenet_v2_x2_0``: ShuffleNetV2 x2.0.\n    \"\"\"\n\n    def __init__(\n        self, num_classes, loss, stages_repeats, stages_out_channels, **kwargs\n    ):\n        super(ShuffleNetV2, self).__init__()\n        self.loss = loss\n\n        if len(stages_repeats) != 3:\n            raise ValueError(\n                'expected stages_repeats as list of 3 positive ints'\n            )\n        if len(stages_out_channels) != 5:\n            raise ValueError(\n                'expected stages_out_channels as list of 5 positive ints'\n            )\n        self._stage_out_channels = stages_out_channels\n\n        input_channels = 3\n        output_channels = self._stage_out_channels[0]\n        self.conv1 = nn.Sequential(\n            nn.Conv2d(input_channels, output_channels, 3, 2, 1, bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.ReLU(inplace=True),\n        )\n        input_channels = output_channels\n\n        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)\n\n        stage_names = ['stage{}'.format(i) for i in [2, 3, 4]]\n        for name, repeats, output_channels in zip(\n            stage_names, stages_repeats, self._stage_out_channels[1:]\n        ):\n            seq = [InvertedResidual(input_channels, output_channels, 2)]\n            for i in range(repeats - 1):\n                seq.append(\n                    InvertedResidual(output_channels, output_channels, 1)\n                )\n            setattr(self, name, nn.Sequential(*seq))\n            input_channels = output_channels\n\n        output_channels = self._stage_out_channels[-1]\n        self.conv5 = nn.Sequential(\n            nn.Conv2d(input_channels, output_channels, 1, 1, 0, bias=False),\n            nn.BatchNorm2d(output_channels),\n            nn.ReLU(inplace=True),\n        )\n        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))\n\n        self.classifier = nn.Linear(output_channels, num_classes)\n\n    def featuremaps(self, x):\n        x = self.conv1(x)\n        x = self.maxpool(x)\n        x = self.stage2(x)\n        x = self.stage3(x)\n        x = self.stage4(x)\n        x = self.conv5(x)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError(\"Unsupported loss: {}\".format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    if model_url is None:\n        import warnings\n        warnings.warn(\n            'ImageNet pretrained weights are unavailable for this model'\n        )\n        return\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef shufflenet_v2_x0_5(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ShuffleNetV2(\n        num_classes, loss, [4, 8, 4], [24, 48, 96, 192, 1024], **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['shufflenetv2_x0.5'])\n    return model\n\n\ndef shufflenet_v2_x1_0(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ShuffleNetV2(\n        num_classes, loss, [4, 8, 4], [24, 116, 232, 464, 1024], **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['shufflenetv2_x1.0'])\n    return model\n\n\ndef shufflenet_v2_x1_5(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ShuffleNetV2(\n        num_classes, loss, [4, 8, 4], [24, 176, 352, 704, 1024], **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['shufflenetv2_x1.5'])\n    return model\n\n\ndef shufflenet_v2_x2_0(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = ShuffleNetV2(\n        num_classes, loss, [4, 8, 4], [24, 244, 488, 976, 2048], **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['shufflenetv2_x2.0'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/squeezenet.py",
    "content": "\"\"\"\nCode source: https://github.com/pytorch/vision\n\"\"\"\nfrom __future__ import division, absolute_import\nimport torch\nimport torch.nn as nn\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['squeezenet1_0', 'squeezenet1_1', 'squeezenet1_0_fc512']\n\nmodel_urls = {\n    'squeezenet1_0':\n    'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',\n    'squeezenet1_1':\n    'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',\n}\n\n\nclass Fire(nn.Module):\n\n    def __init__(\n        self, inplanes, squeeze_planes, expand1x1_planes, expand3x3_planes\n    ):\n        super(Fire, self).__init__()\n        self.inplanes = inplanes\n        self.squeeze = nn.Conv2d(inplanes, squeeze_planes, kernel_size=1)\n        self.squeeze_activation = nn.ReLU(inplace=True)\n        self.expand1x1 = nn.Conv2d(\n            squeeze_planes, expand1x1_planes, kernel_size=1\n        )\n        self.expand1x1_activation = nn.ReLU(inplace=True)\n        self.expand3x3 = nn.Conv2d(\n            squeeze_planes, expand3x3_planes, kernel_size=3, padding=1\n        )\n        self.expand3x3_activation = nn.ReLU(inplace=True)\n\n    def forward(self, x):\n        x = self.squeeze_activation(self.squeeze(x))\n        return torch.cat(\n            [\n                self.expand1x1_activation(self.expand1x1(x)),\n                self.expand3x3_activation(self.expand3x3(x))\n            ], 1\n        )\n\n\nclass SqueezeNet(nn.Module):\n    \"\"\"SqueezeNet.\n\n    Reference:\n        Iandola et al. SqueezeNet: AlexNet-level accuracy with 50x fewer parameters\n        and< 0.5 MB model size. arXiv:1602.07360.\n\n    Public keys:\n        - ``squeezenet1_0``: SqueezeNet (version=1.0).\n        - ``squeezenet1_1``: SqueezeNet (version=1.1).\n        - ``squeezenet1_0_fc512``: SqueezeNet (version=1.0) + FC.\n    \"\"\"\n\n    def __init__(\n        self,\n        num_classes,\n        loss,\n        version=1.0,\n        fc_dims=None,\n        dropout_p=None,\n        **kwargs\n    ):\n        super(SqueezeNet, self).__init__()\n        self.loss = loss\n        self.feature_dim = 512\n\n        if version not in [1.0, 1.1]:\n            raise ValueError(\n                'Unsupported SqueezeNet version {version}:'\n                '1.0 or 1.1 expected'.format(version=version)\n            )\n\n        if version == 1.0:\n            self.features = nn.Sequential(\n                nn.Conv2d(3, 96, kernel_size=7, stride=2),\n                nn.ReLU(inplace=True),\n                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),\n                Fire(96, 16, 64, 64),\n                Fire(128, 16, 64, 64),\n                Fire(128, 32, 128, 128),\n                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),\n                Fire(256, 32, 128, 128),\n                Fire(256, 48, 192, 192),\n                Fire(384, 48, 192, 192),\n                Fire(384, 64, 256, 256),\n                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),\n                Fire(512, 64, 256, 256),\n            )\n        else:\n            self.features = nn.Sequential(\n                nn.Conv2d(3, 64, kernel_size=3, stride=2),\n                nn.ReLU(inplace=True),\n                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),\n                Fire(64, 16, 64, 64),\n                Fire(128, 16, 64, 64),\n                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),\n                Fire(128, 32, 128, 128),\n                Fire(256, 32, 128, 128),\n                nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True),\n                Fire(256, 48, 192, 192),\n                Fire(384, 48, 192, 192),\n                Fire(384, 64, 256, 256),\n                Fire(512, 64, 256, 256),\n            )\n\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.fc = self._construct_fc_layer(fc_dims, 512, dropout_p)\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def forward(self, x):\n        f = self.features(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if self.fc is not None:\n            v = self.fc(v)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initializes model with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url, map_location=None)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef squeezenet1_0(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SqueezeNet(\n        num_classes, loss, version=1.0, fc_dims=None, dropout_p=None, **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['squeezenet1_0'])\n    return model\n\n\ndef squeezenet1_0_fc512(\n    num_classes, loss='softmax', pretrained=True, **kwargs\n):\n    model = SqueezeNet(\n        num_classes,\n        loss,\n        version=1.0,\n        fc_dims=[512],\n        dropout_p=None,\n        **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['squeezenet1_0'])\n    return model\n\n\ndef squeezenet1_1(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = SqueezeNet(\n        num_classes, loss, version=1.1, fc_dims=None, dropout_p=None, **kwargs\n    )\n    if pretrained:\n        init_pretrained_weights(model, model_urls['squeezenet1_1'])\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/models/xception.py",
    "content": "from __future__ import division, absolute_import\nimport torch.nn as nn\nimport torch.nn.functional as F\nimport torch.utils.model_zoo as model_zoo\n\n__all__ = ['xception']\n\npretrained_settings = {\n    'xception': {\n        'imagenet': {\n            'url':\n            'http://data.lip6.fr/cadene/pretrainedmodels/xception-43020ad28.pth',\n            'input_space': 'RGB',\n            'input_size': [3, 299, 299],\n            'input_range': [0, 1],\n            'mean': [0.5, 0.5, 0.5],\n            'std': [0.5, 0.5, 0.5],\n            'num_classes': 1000,\n            'scale':\n            0.8975 # The resize parameter of the validation transform should be 333, and make sure to center crop at 299x299\n        }\n    }\n}\n\n\nclass SeparableConv2d(nn.Module):\n\n    def __init__(\n        self,\n        in_channels,\n        out_channels,\n        kernel_size=1,\n        stride=1,\n        padding=0,\n        dilation=1,\n        bias=False\n    ):\n        super(SeparableConv2d, self).__init__()\n\n        self.conv1 = nn.Conv2d(\n            in_channels,\n            in_channels,\n            kernel_size,\n            stride,\n            padding,\n            dilation,\n            groups=in_channels,\n            bias=bias\n        )\n        self.pointwise = nn.Conv2d(\n            in_channels, out_channels, 1, 1, 0, 1, 1, bias=bias\n        )\n\n    def forward(self, x):\n        x = self.conv1(x)\n        x = self.pointwise(x)\n        return x\n\n\nclass Block(nn.Module):\n\n    def __init__(\n        self,\n        in_filters,\n        out_filters,\n        reps,\n        strides=1,\n        start_with_relu=True,\n        grow_first=True\n    ):\n        super(Block, self).__init__()\n\n        if out_filters != in_filters or strides != 1:\n            self.skip = nn.Conv2d(\n                in_filters, out_filters, 1, stride=strides, bias=False\n            )\n            self.skipbn = nn.BatchNorm2d(out_filters)\n        else:\n            self.skip = None\n\n        self.relu = nn.ReLU(inplace=True)\n        rep = []\n\n        filters = in_filters\n        if grow_first:\n            rep.append(self.relu)\n            rep.append(\n                SeparableConv2d(\n                    in_filters,\n                    out_filters,\n                    3,\n                    stride=1,\n                    padding=1,\n                    bias=False\n                )\n            )\n            rep.append(nn.BatchNorm2d(out_filters))\n            filters = out_filters\n\n        for i in range(reps - 1):\n            rep.append(self.relu)\n            rep.append(\n                SeparableConv2d(\n                    filters, filters, 3, stride=1, padding=1, bias=False\n                )\n            )\n            rep.append(nn.BatchNorm2d(filters))\n\n        if not grow_first:\n            rep.append(self.relu)\n            rep.append(\n                SeparableConv2d(\n                    in_filters,\n                    out_filters,\n                    3,\n                    stride=1,\n                    padding=1,\n                    bias=False\n                )\n            )\n            rep.append(nn.BatchNorm2d(out_filters))\n\n        if not start_with_relu:\n            rep = rep[1:]\n        else:\n            rep[0] = nn.ReLU(inplace=False)\n\n        if strides != 1:\n            rep.append(nn.MaxPool2d(3, strides, 1))\n        self.rep = nn.Sequential(*rep)\n\n    def forward(self, inp):\n        x = self.rep(inp)\n\n        if self.skip is not None:\n            skip = self.skip(inp)\n            skip = self.skipbn(skip)\n        else:\n            skip = inp\n\n        x += skip\n        return x\n\n\nclass Xception(nn.Module):\n    \"\"\"Xception.\n    \n    Reference:\n        Chollet. Xception: Deep Learning with Depthwise\n        Separable Convolutions. CVPR 2017.\n\n    Public keys:\n        - ``xception``: Xception.\n    \"\"\"\n\n    def __init__(\n        self, num_classes, loss, fc_dims=None, dropout_p=None, **kwargs\n    ):\n        super(Xception, self).__init__()\n        self.loss = loss\n\n        self.conv1 = nn.Conv2d(3, 32, 3, 2, 0, bias=False)\n        self.bn1 = nn.BatchNorm2d(32)\n\n        self.conv2 = nn.Conv2d(32, 64, 3, bias=False)\n        self.bn2 = nn.BatchNorm2d(64)\n\n        self.block1 = Block(\n            64, 128, 2, 2, start_with_relu=False, grow_first=True\n        )\n        self.block2 = Block(\n            128, 256, 2, 2, start_with_relu=True, grow_first=True\n        )\n        self.block3 = Block(\n            256, 728, 2, 2, start_with_relu=True, grow_first=True\n        )\n\n        self.block4 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n        self.block5 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n        self.block6 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n        self.block7 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n\n        self.block8 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n        self.block9 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n        self.block10 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n        self.block11 = Block(\n            728, 728, 3, 1, start_with_relu=True, grow_first=True\n        )\n\n        self.block12 = Block(\n            728, 1024, 2, 2, start_with_relu=True, grow_first=False\n        )\n\n        self.conv3 = SeparableConv2d(1024, 1536, 3, 1, 1)\n        self.bn3 = nn.BatchNorm2d(1536)\n\n        self.conv4 = SeparableConv2d(1536, 2048, 3, 1, 1)\n        self.bn4 = nn.BatchNorm2d(2048)\n\n        self.global_avgpool = nn.AdaptiveAvgPool2d(1)\n        self.feature_dim = 2048\n        self.fc = self._construct_fc_layer(fc_dims, 2048, dropout_p)\n        self.classifier = nn.Linear(self.feature_dim, num_classes)\n\n        self._init_params()\n\n    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):\n        \"\"\"Constructs fully connected layer.\n\n        Args:\n            fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed\n            input_dim (int): input dimension\n            dropout_p (float): dropout probability, if None, dropout is unused\n        \"\"\"\n        if fc_dims is None:\n            self.feature_dim = input_dim\n            return None\n\n        assert isinstance(\n            fc_dims, (list, tuple)\n        ), 'fc_dims must be either list or tuple, but got {}'.format(\n            type(fc_dims)\n        )\n\n        layers = []\n        for dim in fc_dims:\n            layers.append(nn.Linear(input_dim, dim))\n            layers.append(nn.BatchNorm1d(dim))\n            layers.append(nn.ReLU(inplace=True))\n            if dropout_p is not None:\n                layers.append(nn.Dropout(p=dropout_p))\n            input_dim = dim\n\n        self.feature_dim = fc_dims[-1]\n\n        return nn.Sequential(*layers)\n\n    def _init_params(self):\n        for m in self.modules():\n            if isinstance(m, nn.Conv2d):\n                nn.init.kaiming_normal_(\n                    m.weight, mode='fan_out', nonlinearity='relu'\n                )\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm2d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.BatchNorm1d):\n                nn.init.constant_(m.weight, 1)\n                nn.init.constant_(m.bias, 0)\n            elif isinstance(m, nn.Linear):\n                nn.init.normal_(m.weight, 0, 0.01)\n                if m.bias is not None:\n                    nn.init.constant_(m.bias, 0)\n\n    def featuremaps(self, input):\n        x = self.conv1(input)\n        x = self.bn1(x)\n        x = F.relu(x, inplace=True)\n\n        x = self.conv2(x)\n        x = self.bn2(x)\n        x = F.relu(x, inplace=True)\n\n        x = self.block1(x)\n        x = self.block2(x)\n        x = self.block3(x)\n        x = self.block4(x)\n        x = self.block5(x)\n        x = self.block6(x)\n        x = self.block7(x)\n        x = self.block8(x)\n        x = self.block9(x)\n        x = self.block10(x)\n        x = self.block11(x)\n        x = self.block12(x)\n\n        x = self.conv3(x)\n        x = self.bn3(x)\n        x = F.relu(x, inplace=True)\n\n        x = self.conv4(x)\n        x = self.bn4(x)\n        x = F.relu(x, inplace=True)\n        return x\n\n    def forward(self, x):\n        f = self.featuremaps(x)\n        v = self.global_avgpool(f)\n        v = v.view(v.size(0), -1)\n\n        if self.fc is not None:\n            v = self.fc(v)\n\n        if not self.training:\n            return v\n\n        y = self.classifier(v)\n\n        if self.loss == 'softmax':\n            return y\n        elif self.loss == 'triplet':\n            return y, v\n        else:\n            raise KeyError('Unsupported loss: {}'.format(self.loss))\n\n\ndef init_pretrained_weights(model, model_url):\n    \"\"\"Initialize models with pretrained weights.\n    \n    Layers that don't match with pretrained layers in name or size are kept unchanged.\n    \"\"\"\n    pretrain_dict = model_zoo.load_url(model_url)\n    model_dict = model.state_dict()\n    pretrain_dict = {\n        k: v\n        for k, v in pretrain_dict.items()\n        if k in model_dict and model_dict[k].size() == v.size()\n    }\n    model_dict.update(pretrain_dict)\n    model.load_state_dict(model_dict)\n\n\ndef xception(num_classes, loss='softmax', pretrained=True, **kwargs):\n    model = Xception(num_classes, loss, fc_dims=None, dropout_p=None, **kwargs)\n    if pretrained:\n        model_url = pretrained_settings['xception']['imagenet']['url']\n        init_pretrained_weights(model, model_url)\n    return model\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/deep/reid_model_factory.py",
    "content": "import torch\nfrom collections import OrderedDict\n\n\n\n__model_types = [\n    'resnet50', 'mlfn', 'hacnn', 'mobilenetv2_x1_0', 'mobilenetv2_x1_4',\n    'osnet_x1_0', 'osnet_x0_75', 'osnet_x0_5', 'osnet_x0_25',\n    'osnet_ibn_x1_0', 'osnet_ain_x1_0']\n\n__trained_urls = {\n\n    # market1501 models ########################################################\n    'resnet50_market1501.pt':\n    'https://drive.google.com/uc?id=1dUUZ4rHDWohmsQXCRe2C_HbYkzz94iBV',\n    'resnet50_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=17ymnLglnc64NRvGOitY3BqMRS9UWd1wg',\n    'resnet50_msmt17.pt':\n    'https://drive.google.com/uc?id=1ep7RypVDOthCRIAqDnn4_N-UhkkFHJsj',\n\n    'resnet50_fc512_market1501.pt':\n    'https://drive.google.com/uc?id=1kv8l5laX_YCdIGVCetjlNdzKIA3NvsSt',\n    'resnet50_fc512_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=13QN8Mp3XH81GK4BPGXobKHKyTGH50Rtx',\n    'resnet50_fc512_msmt17.pt':\n    'https://drive.google.com/uc?id=1fDJLcz4O5wxNSUvImIIjoaIF9u1Rwaud',\n\n    'mlfn_market1501.pt':\n    'https://drive.google.com/uc?id=1wXcvhA_b1kpDfrt9s2Pma-MHxtj9pmvS',\n    'mlfn_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1rExgrTNb0VCIcOnXfMsbwSUW1h2L1Bum',\n    'mlfn_msmt17.pt':\n    'https://drive.google.com/uc?id=18JzsZlJb3Wm7irCbZbZ07TN4IFKvR6p-',\n\n    'hacnn_market1501.pt':\n    'https://drive.google.com/uc?id=1LRKIQduThwGxMDQMiVkTScBwR7WidmYF',\n    'hacnn_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1zNm6tP4ozFUCUQ7Sv1Z98EAJWXJEhtYH',\n    'hacnn_msmt17.pt':\n    'https://drive.google.com/uc?id=1MsKRtPM5WJ3_Tk2xC0aGOO7pM3VaFDNZ',\n\n    'mobilenetv2_x1_0_market1501.pt':\n    'https://drive.google.com/uc?id=18DgHC2ZJkjekVoqBWszD8_Xiikz-fewp',\n    'mobilenetv2_x1_0_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1q1WU2FETRJ3BXcpVtfJUuqq4z3psetds',\n    'mobilenetv2_x1_0_msmt17.pt':\n    'https://drive.google.com/uc?id=1j50Hv14NOUAg7ZeB3frzfX-WYLi7SrhZ',\n\n    'mobilenetv2_x1_4_market1501.pt':\n    'https://drive.google.com/uc?id=1t6JCqphJG-fwwPVkRLmGGyEBhGOf2GO5',\n    'mobilenetv2_x1_4_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=12uD5FeVqLg9-AFDju2L7SQxjmPb4zpBN',\n    'mobilenetv2_x1_4_msmt17.pt':\n    'https://drive.google.com/uc?id=1ZY5P2Zgm-3RbDpbXM0kIBMPvspeNIbXz',\n\n    'osnet_x1_0_market1501.pt':\n    'https://drive.google.com/uc?id=1vduhq5DpN2q1g4fYEZfPI17MJeh9qyrA',\n    'osnet_x1_0_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1QZO_4sNf4hdOKKKzKc-TZU9WW1v6zQbq',\n    'osnet_x1_0_msmt17.pt':\n    'https://drive.google.com/uc?id=112EMUfBPYeYg70w-syK6V6Mx8-Qb9Q1M',\n\n    'osnet_x0_75_market1501.pt':\n    'https://drive.google.com/uc?id=1ozRaDSQw_EQ8_93OUmjDbvLXw9TnfPer',\n    'osnet_x0_75_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1IE3KRaTPp4OUa6PGTFL_d5_KQSJbP0Or',\n    'osnet_x0_75_msmt17.pt':\n    'https://drive.google.com/uc?id=1QEGO6WnJ-BmUzVPd3q9NoaO_GsPNlmWc',\n\n    'osnet_x0_5_market1501.pt':\n    'https://drive.google.com/uc?id=1PLB9rgqrUM7blWrg4QlprCuPT7ILYGKT',\n    'osnet_x0_5_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1KoUVqmiST175hnkALg9XuTi1oYpqcyTu',\n    'osnet_x0_5_msmt17.pt':\n    'https://drive.google.com/uc?id=1UT3AxIaDvS2PdxzZmbkLmjtiqq7AIKCv',\n\n    'osnet_x0_25_market1501.pt':\n    'https://drive.google.com/uc?id=1z1UghYvOTtjx7kEoRfmqSMu-z62J6MAj',\n    'osnet_x0_25_dukemtmcreid.pt':\n    'https://drive.google.com/uc?id=1eumrtiXT4NOspjyEV4j8cHmlOaaCGk5l',\n    'osnet_x0_25_msmt17.pt':\n    'https://drive.google.com/uc?id=1sSwXSUlj4_tHZequ_iZ8w_Jh0VaRQMqF',\n\n    ####### market1501 models ##################################################\n    'resnet50_msmt17.pt':\n    'https://drive.google.com/uc?id=1yiBteqgIZoOeywE8AhGmEQl7FTVwrQmf',\n    'osnet_x1_0_msmt17.pt':\n    'https://drive.google.com/uc?id=1IosIFlLiulGIjwW3H8uMRmx3MzPwf86x',\n    'osnet_x0_75_msmt17.pt':\n    'https://drive.google.com/uc?id=1fhjSS_7SUGCioIf2SWXaRGPqIY9j7-uw',\n\n    'osnet_x0_5_msmt17.pt':\n    'https://drive.google.com/uc?id=1DHgmb6XV4fwG3n-CnCM0zdL9nMsZ9_RF',\n    'osnet_x0_25_msmt17.pt':\n    'https://drive.google.com/uc?id=1Kkx2zW89jq_NETu4u42CFZTMVD5Hwm6e',\n    'osnet_ibn_x1_0_msmt17.pt':\n    'https://drive.google.com/uc?id=1q3Sj2ii34NlfxA4LvmHdWO_75NDRmECJ',\n    'osnet_ain_x1_0_msmt17.pt':\n    'https://drive.google.com/uc?id=1SigwBE6mPdqiJMqhuIY4aqC7--5CsMal',\n}\n\n\ndef show_downloadeable_models():\n    print('\\nAvailable .pt ReID models for automatic download')\n    print(list(__trained_urls.keys()))\n\n\ndef get_model_url(model):\n    if model.name in __trained_urls:\n        return __trained_urls[model.name]\n    else:\n        None\n\n\ndef is_model_in_model_types(model):\n    if model.name in __model_types:\n        return True\n    else:\n        return False\n\n\ndef get_model_name(model):\n    for x in __model_types:\n        if x in model.name:\n            return x\n    return None\n\n\ndef download_url(url, dst):\n    \"\"\"Downloads file from a url to a destination.\n\n    Args:\n        url (str): url to download file.\n        dst (str): destination path.\n    \"\"\"\n    from six.moves import urllib\n    print('* url=\"{}\"'.format(url))\n    print('* destination=\"{}\"'.format(dst))\n\n    def _reporthook(count, block_size, total_size):\n        global start_time\n        if count == 0:\n            start_time = time.time()\n            return\n        duration = time.time() - start_time\n        progress_size = int(count * block_size)\n        speed = int(progress_size / (1024*duration))\n        percent = int(count * block_size * 100 / total_size)\n        sys.stdout.write(\n            '\\r...%d%%, %d MB, %d KB/s, %d seconds passed' %\n            (percent, progress_size / (1024*1024), speed, duration)\n        )\n        sys.stdout.flush()\n\n    urllib.request.urlretrieve(url, dst, _reporthook)\n    sys.stdout.write('\\n')\n\n\ndef load_pretrained_weights(model, weight_path):\n    r\"\"\"Loads pretrianed weights to model.\n\n    Features::\n        - Incompatible layers (unmatched in name or size) will be ignored.\n        - Can automatically deal with keys containing \"module.\".\n\n    Args:\n        model (nn.Module): network model.\n        weight_path (str): path to pretrained weights.\n\n    Examples::\n        >>> from torchreid.utils import load_pretrained_weights\n        >>> weight_path = 'log/my_model/model-best.pth.tar'\n        >>> load_pretrained_weights(model, weight_path)\n    \"\"\"\n    checkpoint = torch.load(weight_path)\n    if 'state_dict' in checkpoint:\n        state_dict = checkpoint['state_dict']\n    else:\n        state_dict = checkpoint\n\n    model_dict = model.state_dict()\n    new_state_dict = OrderedDict()\n    matched_layers, discarded_layers = [], []\n\n    for k, v in state_dict.items():\n        if k.startswith('module.'):\n            k = k[7:] # discard module.\n\n        if k in model_dict and model_dict[k].size() == v.size():\n            new_state_dict[k] = v\n            matched_layers.append(k)\n        else:\n            discarded_layers.append(k)\n\n    model_dict.update(new_state_dict)\n    model.load_state_dict(model_dict)\n\n    if len(matched_layers) == 0:\n        warnings.warn(\n            'The pretrained weights \"{}\" cannot be loaded, '\n            'please check the key names manually '\n            '(** ignored and continue **)'.format(weight_path)\n        )\n    else:\n        print(\n            'Successfully loaded pretrained weights from \"{}\"'.\n            format(weight_path)\n        )\n        if len(discarded_layers) > 0:\n            print(\n                '** The following layers are discarded '\n                'due to unmatched keys or layer size: {}'.\n                format(discarded_layers)\n            )\n\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/reid_multibackend.py",
    "content": "import torch.nn as nn\nimport torch\nfrom pathlib import Path\nimport numpy as np\nfrom itertools import islice\nimport torchvision.transforms as transforms\nimport cv2\nimport sys\nimport torchvision.transforms as T\nfrom collections import OrderedDict, namedtuple\nimport gdown\nfrom os.path import exists as file_exists\n\n\nfrom ultralytics.yolo.utils.checks import check_requirements, check_version\nfrom ultralytics.yolo.utils import LOGGER\nfrom trackers.strongsort.deep.reid_model_factory import (show_downloadeable_models, get_model_url, get_model_name,\n                                                          download_url, load_pretrained_weights)\nfrom trackers.strongsort.deep.models import build_model\n\n\ndef check_suffix(file='yolov5s.pt', suffix=('.pt',), msg=''):\n    # Check file(s) for acceptable suffix\n    if file and suffix:\n        if isinstance(suffix, str):\n            suffix = [suffix]\n        for f in file if isinstance(file, (list, tuple)) else [file]:\n            s = Path(f).suffix.lower()  # file suffix\n            if len(s):\n                assert s in suffix, f\"{msg}{f} acceptable suffix is {suffix}\"\n\n\nclass ReIDDetectMultiBackend(nn.Module):\n    # ReID models MultiBackend class for python inference on various backends\n    def __init__(self, weights='osnet_x0_25_msmt17.pt', device=torch.device('cpu'), fp16=False):\n        super().__init__()\n\n        w = weights[0] if isinstance(weights, list) else weights\n        self.pt, self.jit, self.onnx, self.xml, self.engine, self.tflite = self.model_type(w)  # get backend\n        self.fp16 = fp16\n        self.fp16 &= self.pt or self.jit or self.engine  # FP16\n\n        # Build transform functions\n        self.device = device\n        self.image_size=(256, 128)\n        self.pixel_mean=[0.485, 0.456, 0.406]\n        self.pixel_std=[0.229, 0.224, 0.225]\n        self.transforms = []\n        self.transforms += [T.Resize(self.image_size)]\n        self.transforms += [T.ToTensor()]\n        self.transforms += [T.Normalize(mean=self.pixel_mean, std=self.pixel_std)]\n        self.preprocess = T.Compose(self.transforms)\n        self.to_pil = T.ToPILImage()\n\n        model_name = get_model_name(w)\n\n        if w.suffix == '.pt':\n            model_url = get_model_url(w)\n            if not file_exists(w) and model_url is not None:\n                gdown.download(model_url, str(w), quiet=False)\n            elif file_exists(w):\n                pass\n            else:\n                print(f'No URL associated to the chosen StrongSORT weights ({w}). Choose between:')\n                show_downloadeable_models()\n                exit()\n\n        # Build model\n        self.model = build_model(\n            model_name,\n            num_classes=1,\n            pretrained=not (w and w.is_file()),\n            use_gpu=device\n        )\n\n        if self.pt:  # PyTorch\n            # populate model arch with weights\n            if w and w.is_file() and w.suffix == '.pt':\n                load_pretrained_weights(self.model, w)\n                \n            self.model.to(device).eval()\n            self.model.half() if self.fp16 else  self.model.float()\n        elif self.jit:\n            LOGGER.info(f'Loading {w} for TorchScript inference...')\n            self.model = torch.jit.load(w)\n            self.model.half() if self.fp16 else self.model.float()\n        elif self.onnx:  # ONNX Runtime\n            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')\n            cuda = torch.cuda.is_available() and device.type != 'cpu'\n            #check_requirements(('onnx', 'onnxruntime-gpu' if cuda else 'onnxruntime'))\n            import onnxruntime\n            providers = ['CUDAExecutionProvider', 'CPUExecutionProvider'] if cuda else ['CPUExecutionProvider']\n            self.session = onnxruntime.InferenceSession(str(w), providers=providers)\n        elif self.engine:  # TensorRT\n            LOGGER.info(f'Loading {w} for TensorRT inference...')\n            import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-download\n            check_version(trt.__version__, '7.0.0', hard=True)  # require tensorrt>=7.0.0\n            if device.type == 'cpu':\n                device = torch.device('cuda:0')\n            Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))\n            logger = trt.Logger(trt.Logger.INFO)\n            with open(w, 'rb') as f, trt.Runtime(logger) as runtime:\n                self.model_ = runtime.deserialize_cuda_engine(f.read())\n            self.context = self.model_.create_execution_context()\n            self.bindings = OrderedDict()\n            self.fp16 = False  # default updated below\n            dynamic = False\n            for index in range(self.model_.num_bindings):\n                name = self.model_.get_binding_name(index)\n                dtype = trt.nptype(self.model_.get_binding_dtype(index))\n                if self.model_.binding_is_input(index):\n                    if -1 in tuple(self.model_.get_binding_shape(index)):  # dynamic\n                        dynamic = True\n                        self.context.set_binding_shape(index, tuple(self.model_.get_profile_shape(0, index)[2]))\n                    if dtype == np.float16:\n                        self.fp16 = True\n                shape = tuple(self.context.get_binding_shape(index))\n                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)\n                self.bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))\n            self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())\n            batch_size = self.bindings['images'].shape[0]  # if dynamic, this is instead max batch size\n        elif self.xml:  # OpenVINO\n            LOGGER.info(f'Loading {w} for OpenVINO inference...')\n            check_requirements(('openvino',))  # requires openvino-dev: https://pypi.org/project/openvino-dev/\n            from openvino.runtime import Core, Layout, get_batch\n            ie = Core()\n            if not Path(w).is_file():  # if not *.xml\n                w = next(Path(w).glob('*.xml'))  # get *.xml file from *_openvino_model dir\n            network = ie.read_model(model=w, weights=Path(w).with_suffix('.bin'))\n            if network.get_parameters()[0].get_layout().empty:\n                network.get_parameters()[0].set_layout(Layout(\"NCWH\"))\n            batch_dim = get_batch(network)\n            if batch_dim.is_static:\n                batch_size = batch_dim.get_length()\n            self.executable_network = ie.compile_model(network, device_name=\"CPU\")  # device_name=\"MYRIAD\" for Intel NCS2\n            self.output_layer = next(iter(self.executable_network.outputs))\n        \n        elif self.tflite:\n            LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')\n            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu\n                from tflite_runtime.interpreter import Interpreter, load_delegate\n            except ImportError:\n                import tensorflow as tf\n                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate,\n            self.interpreter = tf.lite.Interpreter(model_path=w)\n            self.interpreter.allocate_tensors()\n            # Get input and output tensors.\n            self.input_details = self.interpreter.get_input_details()\n            self.output_details = self.interpreter.get_output_details()\n            \n            # Test model on random input data.\n            input_data = np.array(np.random.random_sample((1,256,128,3)), dtype=np.float32)\n            self.interpreter.set_tensor(self.input_details[0]['index'], input_data)\n            \n            self.interpreter.invoke()\n\n            # The function `get_tensor()` returns a copy of the tensor data.\n            output_data = self.interpreter.get_tensor(self.output_details[0]['index'])\n        else:\n            print('This model framework is not supported yet!')\n            exit()\n        \n        \n    @staticmethod\n    def model_type(p='path/to/model.pt'):\n        # Return model type from model path, i.e. path='path/to/model.onnx' -> type=onnx\n        from trackers.reid_export import export_formats\n        sf = list(export_formats().Suffix)  # export suffixes\n        check_suffix(p, sf)  # checks\n        types = [s in Path(p).name for s in sf]\n        return types\n\n    def _preprocess(self, im_batch):\n\n        images = []\n        for element in im_batch:\n            image = self.to_pil(element)\n            image = self.preprocess(image)\n            images.append(image)\n\n        images = torch.stack(images, dim=0)\n        images = images.to(self.device)\n\n        return images\n    \n    \n    def forward(self, im_batch):\n        \n        # preprocess batch\n        im_batch = self._preprocess(im_batch)\n\n        # batch to half\n        if self.fp16 and im_batch.dtype != torch.float16:\n           im_batch = im_batch.half()\n\n        # batch processing\n        features = []\n        if self.pt:\n            features = self.model(im_batch)\n        elif self.jit:  # TorchScript\n            features = self.model(im_batch)\n        elif self.onnx:  # ONNX Runtime\n            im_batch = im_batch.cpu().numpy()  # torch to numpy\n            features = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im_batch})[0]\n        elif self.engine:  # TensorRT\n            if True and im_batch.shape != self.bindings['images'].shape:\n                i_in, i_out = (self.model_.get_binding_index(x) for x in ('images', 'output'))\n                self.context.set_binding_shape(i_in, im_batch.shape)  # reshape if dynamic\n                self.bindings['images'] = self.bindings['images']._replace(shape=im_batch.shape)\n                self.bindings['output'].data.resize_(tuple(self.context.get_binding_shape(i_out)))\n            s = self.bindings['images'].shape\n            assert im_batch.shape == s, f\"input size {im_batch.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}\"\n            self.binding_addrs['images'] = int(im_batch.data_ptr())\n            self.context.execute_v2(list(self.binding_addrs.values()))\n            features = self.bindings['output'].data\n        elif self.xml:  # OpenVINO\n            im_batch = im_batch.cpu().numpy()  # FP32\n            features = self.executable_network([im_batch])[self.output_layer]\n        else:\n            print('Framework not supported at the moment, we are working on it...')\n            exit()\n\n        if isinstance(features, (list, tuple)):\n            return self.from_numpy(features[0]) if len(features) == 1 else [self.from_numpy(x) for x in features]\n        else:\n            return self.from_numpy(features)\n\n    def from_numpy(self, x):\n        return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else x\n\n    def warmup(self, imgsz=[(256, 128, 3)]):\n        # Warmup model by running inference once\n        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.tflite\n        if any(warmup_types) and self.device.type != 'cpu':\n            im = [np.empty(*imgsz).astype(np.uint8)]  # input\n            for _ in range(2 if self.jit else 1):  #\n                self.forward(im)  # warmup"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/__init__.py",
    "content": ""
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/detection.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\n\n\nclass Detection(object):\n    \"\"\"\n    This class represents a bounding box detection in a single image.\n\n    Parameters\n    ----------\n    tlwh : array_like\n        Bounding box in format `(x, y, w, h)`.\n    confidence : float\n        Detector confidence score.\n    feature : array_like\n        A feature vector that describes the object contained in this image.\n\n    Attributes\n    ----------\n    tlwh : ndarray\n        Bounding box in format `(top left x, top left y, width, height)`.\n    confidence : ndarray\n        Detector confidence score.\n    feature : ndarray | NoneType\n        A feature vector that describes the object contained in this image.\n\n    \"\"\"\n\n    def __init__(self, tlwh, confidence, feature):\n        self.tlwh = np.asarray(tlwh, dtype=np.float32)\n        self.confidence = float(confidence)\n        self.feature = np.asarray(feature.cpu(), dtype=np.float32)\n\n    def to_tlbr(self):\n        \"\"\"Convert bounding box to format `(min x, min y, max x, max y)`, i.e.,\n        `(top left, bottom right)`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[2:] += ret[:2]\n        return ret\n\n    def to_xyah(self):\n        \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n        height)`, where the aspect ratio is `width / height`.\n        \"\"\"\n        ret = self.tlwh.copy()\n        ret[:2] += ret[2:] / 2\n        ret[2] /= ret[3]\n        return ret\n    \ndef to_xyah_ext(bbox):\n    \"\"\"Convert bounding box to format `(center x, center y, aspect ratio,\n    height)`, where the aspect ratio is `width / height`.\n    \"\"\"\n    ret = bbox.copy()\n    ret[:2] += ret[2:] / 2\n    ret[2] /= ret[3]\n    return ret\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/iou_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom . import linear_assignment\n\n\ndef iou(bbox, candidates):\n    \"\"\"Computer intersection over union.\n\n    Parameters\n    ----------\n    bbox : ndarray\n        A bounding box in format `(top left x, top left y, width, height)`.\n    candidates : ndarray\n        A matrix of candidate bounding boxes (one per row) in the same format\n        as `bbox`.\n\n    Returns\n    -------\n    ndarray\n        The intersection over union in [0, 1] between the `bbox` and each\n        candidate. A higher score means a larger fraction of the `bbox` is\n        occluded by the candidate.\n\n    \"\"\"\n    bbox_tl, bbox_br = bbox[:2], bbox[:2] + bbox[2:]\n    candidates_tl = candidates[:, :2]\n    candidates_br = candidates[:, :2] + candidates[:, 2:]\n\n    tl = np.c_[np.maximum(bbox_tl[0], candidates_tl[:, 0])[:, np.newaxis],\n               np.maximum(bbox_tl[1], candidates_tl[:, 1])[:, np.newaxis]]\n    br = np.c_[np.minimum(bbox_br[0], candidates_br[:, 0])[:, np.newaxis],\n               np.minimum(bbox_br[1], candidates_br[:, 1])[:, np.newaxis]]\n    wh = np.maximum(0., br - tl)\n\n    area_intersection = wh.prod(axis=1)\n    area_bbox = bbox[2:].prod()\n    area_candidates = candidates[:, 2:].prod(axis=1)\n    return area_intersection / (area_bbox + area_candidates - area_intersection)\n\n\ndef iou_cost(tracks, detections, track_indices=None,\n             detection_indices=None):\n    \"\"\"An intersection over union distance metric.\n\n    Parameters\n    ----------\n    tracks : List[deep_sort.track.Track]\n        A list of tracks.\n    detections : List[deep_sort.detection.Detection]\n        A list of detections.\n    track_indices : Optional[List[int]]\n        A list of indices to tracks that should be matched. Defaults to\n        all `tracks`.\n    detection_indices : Optional[List[int]]\n        A list of indices to detections that should be matched. Defaults\n        to all `detections`.\n\n    Returns\n    -------\n    ndarray\n        Returns a cost matrix of shape\n        len(track_indices), len(detection_indices) where entry (i, j) is\n        `1 - iou(tracks[track_indices[i]], detections[detection_indices[j]])`.\n\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    cost_matrix = np.zeros((len(track_indices), len(detection_indices)))\n    for row, track_idx in enumerate(track_indices):\n        if tracks[track_idx].time_since_update > 1:\n            cost_matrix[row, :] = linear_assignment.INFTY_COST\n            continue\n\n        bbox = tracks[track_idx].to_tlwh()\n        candidates = np.asarray(\n            [detections[i].tlwh for i in detection_indices])\n        cost_matrix[row, :] = 1. - iou(bbox, candidates)\n    return cost_matrix\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/kalman_filter.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport scipy.linalg\n\"\"\"\nTable for the 0.95 quantile of the chi-square distribution with N degrees of\nfreedom (contains values for N=1, ..., 9). Taken from MATLAB/Octave's chi2inv\nfunction and used as Mahalanobis gating threshold.\n\"\"\"\nchi2inv95 = {\n    1: 3.8415,\n    2: 5.9915,\n    3: 7.8147,\n    4: 9.4877,\n    5: 11.070,\n    6: 12.592,\n    7: 14.067,\n    8: 15.507,\n    9: 16.919}\n\n\nclass KalmanFilter(object):\n    \"\"\"\n    A simple Kalman filter for tracking bounding boxes in image space.\n    The 8-dimensional state space\n        x, y, a, h, vx, vy, va, vh\n    contains the bounding box center position (x, y), aspect ratio a, height h,\n    and their respective velocities.\n    Object motion follows a constant velocity model. The bounding box location\n    (x, y, a, h) is taken as direct observation of the state space (linear\n    observation model).\n    \"\"\"\n\n    def __init__(self):\n        ndim, dt = 4, 1.\n\n        # Create Kalman filter model matrices.\n        self._motion_mat = np.eye(2 * ndim, 2 * ndim)\n        for i in range(ndim):\n            self._motion_mat[i, ndim + i] = dt\n\n        self._update_mat = np.eye(ndim, 2 * ndim)\n\n        # Motion and observation uncertainty are chosen relative to the current\n        # state estimate. These weights control the amount of uncertainty in\n        # the model. This is a bit hacky.\n        self._std_weight_position = 1. / 20\n        self._std_weight_velocity = 1. / 160\n\n    def initiate(self, measurement):\n        \"\"\"Create track from unassociated measurement.\n        Parameters\n        ----------\n        measurement : ndarray\n            Bounding box coordinates (x, y, a, h) with center position (x, y),\n            aspect ratio a, and height h.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector (8 dimensional) and covariance matrix (8x8\n            dimensional) of the new track. Unobserved velocities are initialized\n            to 0 mean.\n        \"\"\"\n        mean_pos = measurement\n        mean_vel = np.zeros_like(mean_pos)\n        mean = np.r_[mean_pos, mean_vel]\n\n        std = [\n            2 * self._std_weight_position * measurement[0],   # the center point x\n            2 * self._std_weight_position * measurement[1],   # the center point y\n            1 * measurement[2],                               # the ratio of width/height\n            2 * self._std_weight_position * measurement[3],   # the height\n            10 * self._std_weight_velocity * measurement[0],\n            10 * self._std_weight_velocity * measurement[1],\n            0.1 * measurement[2],\n            10 * self._std_weight_velocity * measurement[3]]\n        covariance = np.diag(np.square(std))\n        return mean, covariance\n\n    def predict(self, mean, covariance):\n        \"\"\"Run Kalman filter prediction step.\n        Parameters\n        ----------\n        mean : ndarray\n            The 8 dimensional mean vector of the object state at the previous\n            time step.\n        covariance : ndarray\n            The 8x8 dimensional covariance matrix of the object state at the\n            previous time step.\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the mean vector and covariance matrix of the predicted\n            state. Unobserved velocities are initialized to 0 mean.\n        \"\"\"\n        std_pos = [\n            self._std_weight_position * mean[0],\n            self._std_weight_position * mean[1],\n            1 * mean[2],\n            self._std_weight_position * mean[3]]\n        std_vel = [\n            self._std_weight_velocity * mean[0],\n            self._std_weight_velocity * mean[1],\n            0.1 * mean[2],\n            self._std_weight_velocity * mean[3]]\n        motion_cov = np.diag(np.square(np.r_[std_pos, std_vel]))\n\n        mean = np.dot(self._motion_mat, mean)\n        covariance = np.linalg.multi_dot((\n            self._motion_mat, covariance, self._motion_mat.T)) + motion_cov\n\n        return mean, covariance\n\n    def project(self, mean, covariance, confidence=.0):\n        \"\"\"Project state distribution to measurement space.\n        Parameters\n        ----------\n        mean : ndarray\n            The state's mean vector (8 dimensional array).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        confidence: (dyh) 检测框置信度\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the projected mean and covariance matrix of the given state\n            estimate.\n        \"\"\"\n        std = [\n            self._std_weight_position * mean[3],\n            self._std_weight_position * mean[3],\n            1e-1,\n            self._std_weight_position * mean[3]]\n\n\n        std = [(1 - confidence) * x for x in std]\n\n        innovation_cov = np.diag(np.square(std))\n\n        mean = np.dot(self._update_mat, mean)\n        covariance = np.linalg.multi_dot((\n            self._update_mat, covariance, self._update_mat.T))\n        return mean, covariance + innovation_cov\n\n    def update(self, mean, covariance, measurement, confidence=.0):\n        \"\"\"Run Kalman filter correction step.\n        Parameters\n        ----------\n        mean : ndarray\n            The predicted state's mean vector (8 dimensional).\n        covariance : ndarray\n            The state's covariance matrix (8x8 dimensional).\n        measurement : ndarray\n            The 4 dimensional measurement vector (x, y, a, h), where (x, y)\n            is the center position, a the aspect ratio, and h the height of the\n            bounding box.\n        confidence: (dyh)检测框置信度\n        Returns\n        -------\n        (ndarray, ndarray)\n            Returns the measurement-corrected state distribution.\n        \"\"\"\n        projected_mean, projected_cov = self.project(mean, covariance, confidence)\n\n        chol_factor, lower = scipy.linalg.cho_factor(\n            projected_cov, lower=True, check_finite=False)\n        kalman_gain = scipy.linalg.cho_solve(\n            (chol_factor, lower), np.dot(covariance, self._update_mat.T).T,\n            check_finite=False).T\n        innovation = measurement - projected_mean\n\n        new_mean = mean + np.dot(innovation, kalman_gain.T)\n        new_covariance = covariance - np.linalg.multi_dot((\n            kalman_gain, projected_cov, kalman_gain.T))\n        return new_mean, new_covariance\n\n    def gating_distance(self, mean, covariance, measurements,\n                        only_position=False):\n        \"\"\"Compute gating distance between state distribution and measurements.\n        A suitable distance threshold can be obtained from `chi2inv95`. If\n        `only_position` is False, the chi-square distribution has 4 degrees of\n        freedom, otherwise 2.\n        Parameters\n        ----------\n        mean : ndarray\n            Mean vector over the state distribution (8 dimensional).\n        covariance : ndarray\n            Covariance of the state distribution (8x8 dimensional).\n        measurements : ndarray\n            An Nx4 dimensional matrix of N measurements, each in\n            format (x, y, a, h) where (x, y) is the bounding box center\n            position, a the aspect ratio, and h the height.\n        only_position : Optional[bool]\n            If True, distance computation is done with respect to the bounding\n            box center position only.\n        Returns\n        -------\n        ndarray\n            Returns an array of length N, where the i-th element contains the\n            squared Mahalanobis distance between (mean, covariance) and\n            `measurements[i]`.\n        \"\"\"\n        mean, covariance = self.project(mean, covariance)\n\n        if only_position:\n            mean, covariance = mean[:2], covariance[:2, :2]\n            measurements = measurements[:, :2]\n\n        cholesky_factor = np.linalg.cholesky(covariance)\n        d = measurements - mean\n        z = scipy.linalg.solve_triangular(\n            cholesky_factor, d.T, lower=True, check_finite=False,\n            overwrite_b=True)\n        squared_maha = np.sum(z * z, axis=0)\n        return squared_maha"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/linear_assignment.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom scipy.optimize import linear_sum_assignment\nfrom . import kalman_filter\n\n\nINFTY_COST = 1e+5\n\n\ndef min_cost_matching(\n        distance_metric, max_distance, tracks, detections, track_indices=None,\n        detection_indices=None):\n    \"\"\"Solve linear assignment problem.\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection_indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n    \"\"\"\n    if track_indices is None:\n        track_indices = np.arange(len(tracks))\n    if detection_indices is None:\n        detection_indices = np.arange(len(detections))\n\n    if len(detection_indices) == 0 or len(track_indices) == 0:\n        return [], track_indices, detection_indices  # Nothing to match.\n\n    cost_matrix = distance_metric(\n        tracks, detections, track_indices, detection_indices)\n    cost_matrix[cost_matrix > max_distance] = max_distance + 1e-5\n    row_indices, col_indices = linear_sum_assignment(cost_matrix)\n\n    matches, unmatched_tracks, unmatched_detections = [], [], []\n    for col, detection_idx in enumerate(detection_indices):\n        if col not in col_indices:\n            unmatched_detections.append(detection_idx)\n    for row, track_idx in enumerate(track_indices):\n        if row not in row_indices:\n            unmatched_tracks.append(track_idx)\n    for row, col in zip(row_indices, col_indices):\n        track_idx = track_indices[row]\n        detection_idx = detection_indices[col]\n        if cost_matrix[row, col] > max_distance:\n            unmatched_tracks.append(track_idx)\n            unmatched_detections.append(detection_idx)\n        else:\n            matches.append((track_idx, detection_idx))\n    return matches, unmatched_tracks, unmatched_detections\n\n\ndef matching_cascade(\n        distance_metric, max_distance, cascade_depth, tracks, detections,\n        track_indices=None, detection_indices=None):\n    \"\"\"Run matching cascade.\n    Parameters\n    ----------\n    distance_metric : Callable[List[Track], List[Detection], List[int], List[int]) -> ndarray\n        The distance metric is given a list of tracks and detections as well as\n        a list of N track indices and M detection indices. The metric should\n        return the NxM dimensional cost matrix, where element (i, j) is the\n        association cost between the i-th track in the given track indices and\n        the j-th detection in the given detection indices.\n    max_distance : float\n        Gating threshold. Associations with cost larger than this value are\n        disregarded.\n    cascade_depth: int\n        The cascade depth, should be se to the maximum track age.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : Optional[List[int]]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above). Defaults to all tracks.\n    detection_indices : Optional[List[int]]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above). Defaults to all\n        detections.\n    Returns\n    -------\n    (List[(int, int)], List[int], List[int])\n        Returns a tuple with the following three entries:\n        * A list of matched track and detection indices.\n        * A list of unmatched track indices.\n        * A list of unmatched detection indices.\n    \"\"\"\n    if track_indices is None:\n        track_indices = list(range(len(tracks)))\n    if detection_indices is None:\n        detection_indices = list(range(len(detections)))\n\n    unmatched_detections = detection_indices\n    matches = []\n    track_indices_l = [\n        k for k in track_indices\n        # if tracks[k].time_since_update == 1 + level\n    ]\n    matches_l, _, unmatched_detections = \\\n        min_cost_matching(\n            distance_metric, max_distance, tracks, detections,\n            track_indices_l, unmatched_detections)\n    matches += matches_l\n    unmatched_tracks = list(set(track_indices) - set(k for k, _ in matches))\n    return matches, unmatched_tracks, unmatched_detections\n\n\ndef gate_cost_matrix(\n        cost_matrix, tracks, detections, track_indices, detection_indices, mc_lambda,\n        gated_cost=INFTY_COST, only_position=False):\n    \"\"\"Invalidate infeasible entries in cost matrix based on the state\n    distributions obtained by Kalman filtering.\n    Parameters\n    ----------\n    kf : The Kalman filter.\n    cost_matrix : ndarray\n        The NxM dimensional cost matrix, where N is the number of track indices\n        and M is the number of detection indices, such that entry (i, j) is the\n        association cost between `tracks[track_indices[i]]` and\n        `detections[detection_indices[j]]`.\n    tracks : List[track.Track]\n        A list of predicted tracks at the current time step.\n    detections : List[detection.Detection]\n        A list of detections at the current time step.\n    track_indices : List[int]\n        List of track indices that maps rows in `cost_matrix` to tracks in\n        `tracks` (see description above).\n    detection_indices : List[int]\n        List of detection indices that maps columns in `cost_matrix` to\n        detections in `detections` (see description above).\n    gated_cost : Optional[float]\n        Entries in the cost matrix corresponding to infeasible associations are\n        set this value. Defaults to a very large value.\n    only_position : Optional[bool]\n        If True, only the x, y position of the state distribution is considered\n        during gating. Defaults to False.\n    Returns\n    -------\n    ndarray\n        Returns the modified cost matrix.\n    \"\"\"\n    gating_dim = 2 if only_position else 4\n    gating_threshold = kalman_filter.chi2inv95[gating_dim]\n    measurements = np.asarray(\n        [detections[i].to_xyah() for i in detection_indices])\n    for row, track_idx in enumerate(track_indices):\n        track = tracks[track_idx]\n        gating_distance = track.kf.gating_distance(track.mean, track.covariance, measurements, only_position)\n        cost_matrix[row, gating_distance > gating_threshold] = gated_cost\n        cost_matrix[row] = mc_lambda * cost_matrix[row] + (1 - mc_lambda) *  gating_distance\n    return cost_matrix\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/nn_matching.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport sys\nimport torch\n\n\ndef _pdist(a, b):\n    \"\"\"Compute pair-wise squared distance between points in `a` and `b`.\n    Parameters\n    ----------\n    a : array_like\n        An NxM matrix of N samples of dimensionality M.\n    b : array_like\n        An LxM matrix of L samples of dimensionality M.\n    Returns\n    -------\n    ndarray\n        Returns a matrix of size len(a), len(b) such that eleement (i, j)\n        contains the squared distance between `a[i]` and `b[j]`.\n    \"\"\"\n    a, b = np.asarray(a), np.asarray(b)\n    if len(a) == 0 or len(b) == 0:\n        return np.zeros((len(a), len(b)))\n    a2, b2 = np.square(a).sum(axis=1), np.square(b).sum(axis=1)\n    r2 = -2. * np.dot(a, b.T) + a2[:, None] + b2[None, :]\n    r2 = np.clip(r2, 0., float(np.inf))\n    return r2\n\n\ndef _cosine_distance(a, b, data_is_normalized=False):\n    \"\"\"Compute pair-wise cosine distance between points in `a` and `b`.\n    Parameters\n    ----------\n    a : array_like\n        An NxM matrix of N samples of dimensionality M.\n    b : array_like\n        An LxM matrix of L samples of dimensionality M.\n    data_is_normalized : Optional[bool]\n        If True, assumes rows in a and b are unit length vectors.\n        Otherwise, a and b are explicitly normalized to lenght 1.\n    Returns\n    -------\n    ndarray\n        Returns a matrix of size len(a), len(b) such that eleement (i, j)\n        contains the squared distance between `a[i]` and `b[j]`.\n    \"\"\"\n    if not data_is_normalized:\n        a = np.asarray(a) / np.linalg.norm(a, axis=1, keepdims=True)\n        b = np.asarray(b) / np.linalg.norm(b, axis=1, keepdims=True)\n    return 1. - np.dot(a, b.T)\n\n\ndef _nn_euclidean_distance(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (Euclidean).\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest Euclidean distance to a sample in `x`.\n    \"\"\"\n    # x_ = torch.from_numpy(np.asarray(x) / np.linalg.norm(x, axis=1, keepdims=True))\n    # y_ = torch.from_numpy(np.asarray(y) / np.linalg.norm(y, axis=1, keepdims=True))\n    distances = distances = _pdist(x, y)\n    return np.maximum(0.0, torch.min(distances, axis=0)[0].numpy())\n\n\ndef _nn_cosine_distance(x, y):\n    \"\"\" Helper function for nearest neighbor distance metric (cosine).\n    Parameters\n    ----------\n    x : ndarray\n        A matrix of N row-vectors (sample points).\n    y : ndarray\n        A matrix of M row-vectors (query points).\n    Returns\n    -------\n    ndarray\n        A vector of length M that contains for each entry in `y` the\n        smallest cosine distance to a sample in `x`.\n    \"\"\"\n    x_ = torch.from_numpy(np.asarray(x))\n    y_ = torch.from_numpy(np.asarray(y))\n    distances = _cosine_distance(x_, y_)\n    distances = distances\n    return distances.min(axis=0)\n\n\nclass NearestNeighborDistanceMetric(object):\n    \"\"\"\n    A nearest neighbor distance metric that, for each target, returns\n    the closest distance to any sample that has been observed so far.\n    Parameters\n    ----------\n    metric : str\n        Either \"euclidean\" or \"cosine\".\n    matching_threshold: float\n        The matching threshold. Samples with larger distance are considered an\n        invalid match.\n    budget : Optional[int]\n        If not None, fix samples per class to at most this number. Removes\n        the oldest samples when the budget is reached.\n    Attributes\n    ----------\n    samples : Dict[int -> List[ndarray]]\n        A dictionary that maps from target identities to the list of samples\n        that have been observed so far.\n    \"\"\"\n\n    def __init__(self, metric, matching_threshold, budget=None):\n        if metric == \"euclidean\":\n            self._metric = _nn_euclidean_distance\n        elif metric == \"cosine\":\n            self._metric = _nn_cosine_distance\n        else:\n            raise ValueError(\n                \"Invalid metric; must be either 'euclidean' or 'cosine'\")\n        self.matching_threshold = matching_threshold\n        self.budget = budget\n        self.samples = {}\n\n    def partial_fit(self, features, targets, active_targets):\n        \"\"\"Update the distance metric with new data.\n        Parameters\n        ----------\n        features : ndarray\n            An NxM matrix of N features of dimensionality M.\n        targets : ndarray\n            An integer array of associated target identities.\n        active_targets : List[int]\n            A list of targets that are currently present in the scene.\n        \"\"\"\n        for feature, target in zip(features, targets):\n            self.samples.setdefault(target, []).append(feature)\n            if self.budget is not None:\n                self.samples[target] = self.samples[target][-self.budget:]\n        self.samples = {k: self.samples[k] for k in active_targets}\n\n    def distance(self, features, targets):\n        \"\"\"Compute distance between features and targets.\n        Parameters\n        ----------\n        features : ndarray\n            An NxM matrix of N features of dimensionality M.\n        targets : List[int]\n            A list of targets to match the given `features` against.\n        Returns\n        -------\n        ndarray\n            Returns a cost matrix of shape len(targets), len(features), where\n            element (i, j) contains the closest squared distance between\n            `targets[i]` and `features[j]`.\n        \"\"\"\n        cost_matrix = np.zeros((len(targets), len(features)))\n        for i, target in enumerate(targets):\n            cost_matrix[i, :] = self._metric(self.samples[target], features)\n        return cost_matrix"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/preprocessing.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport numpy as np\nimport cv2\n\n\ndef non_max_suppression(boxes, max_bbox_overlap, scores=None):\n    \"\"\"Suppress overlapping detections.\n\n    Original code from [1]_ has been adapted to include confidence score.\n\n    .. [1] http://www.pyimagesearch.com/2015/02/16/\n           faster-non-maximum-suppression-python/\n\n    Examples\n    --------\n\n        >>> boxes = [d.roi for d in detections]\n        >>> scores = [d.confidence for d in detections]\n        >>> indices = non_max_suppression(boxes, max_bbox_overlap, scores)\n        >>> detections = [detections[i] for i in indices]\n\n    Parameters\n    ----------\n    boxes : ndarray\n        Array of ROIs (x, y, width, height).\n    max_bbox_overlap : float\n        ROIs that overlap more than this values are suppressed.\n    scores : Optional[array_like]\n        Detector confidence score.\n\n    Returns\n    -------\n    List[int]\n        Returns indices of detections that have survived non-maxima suppression.\n\n    \"\"\"\n    if len(boxes) == 0:\n        return []\n\n    boxes = boxes.astype(np.float)\n    pick = []\n\n    x1 = boxes[:, 0]\n    y1 = boxes[:, 1]\n    x2 = boxes[:, 2] + boxes[:, 0]\n    y2 = boxes[:, 3] + boxes[:, 1]\n\n    area = (x2 - x1 + 1) * (y2 - y1 + 1)\n    if scores is not None:\n        idxs = np.argsort(scores)\n    else:\n        idxs = np.argsort(y2)\n\n    while len(idxs) > 0:\n        last = len(idxs) - 1\n        i = idxs[last]\n        pick.append(i)\n\n        xx1 = np.maximum(x1[i], x1[idxs[:last]])\n        yy1 = np.maximum(y1[i], y1[idxs[:last]])\n        xx2 = np.minimum(x2[i], x2[idxs[:last]])\n        yy2 = np.minimum(y2[i], y2[idxs[:last]])\n\n        w = np.maximum(0, xx2 - xx1 + 1)\n        h = np.maximum(0, yy2 - yy1 + 1)\n\n        overlap = (w * h) / area[idxs[:last]]\n\n        idxs = np.delete(\n            idxs, np.concatenate(\n                ([last], np.where(overlap > max_bbox_overlap)[0])))\n\n    return pick\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/track.py",
    "content": "# vim: expandtab:ts=4:sw=4\nimport cv2\nimport numpy as np\nfrom trackers.strongsort.sort.kalman_filter import KalmanFilter\nfrom collections import deque\n\n\nclass TrackState:\n    \"\"\"\n    Enumeration type for the single target track state. Newly created tracks are\n    classified as `tentative` until enough evidence has been collected. Then,\n    the track state is changed to `confirmed`. Tracks that are no longer alive\n    are classified as `deleted` to mark them for removal from the set of active\n    tracks.\n\n    \"\"\"\n\n    Tentative = 1\n    Confirmed = 2\n    Deleted = 3\n\n\nclass Track:\n    \"\"\"\n    A single target track with state space `(x, y, a, h)` and associated\n    velocities, where `(x, y)` is the center of the bounding box, `a` is the\n    aspect ratio and `h` is the height.\n\n    Parameters\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n    max_age : int\n        The maximum number of consecutive misses before the track state is\n        set to `Deleted`.\n    feature : Optional[ndarray]\n        Feature vector of the detection this track originates from. If not None,\n        this feature is added to the `features` cache.\n\n    Attributes\n    ----------\n    mean : ndarray\n        Mean vector of the initial state distribution.\n    covariance : ndarray\n        Covariance matrix of the initial state distribution.\n    track_id : int\n        A unique track identifier.\n    hits : int\n        Total number of measurement updates.\n    age : int\n        Total number of frames since first occurance.\n    time_since_update : int\n        Total number of frames since last measurement update.\n    state : TrackState\n        The current track state.\n    features : List[ndarray]\n        A cache of features. On each measurement update, the associated feature\n        vector is added to this list.\n\n    \"\"\"\n\n    def __init__(self, detection, track_id, class_id, conf, n_init, max_age, ema_alpha,\n                 feature=None):\n        self.track_id = track_id\n        self.class_id = int(class_id)\n        self.hits = 1\n        self.age = 1\n        self.time_since_update = 0\n        self.max_num_updates_wo_assignment = 7\n        self.updates_wo_assignment = 0\n        self.ema_alpha = ema_alpha\n\n        self.state = TrackState.Tentative\n        self.features = []\n        if feature is not None:\n            feature /= np.linalg.norm(feature)\n            self.features.append(feature)\n\n        self.conf = conf\n        self._n_init = n_init\n        self._max_age = max_age\n\n        self.kf = KalmanFilter()\n        self.mean, self.covariance = self.kf.initiate(detection)\n        \n        # Initializing trajectory queue\n        self.q = deque(maxlen=25)\n\n    def to_tlwh(self):\n        \"\"\"Get current position in bounding box format `(top left x, top left y,\n        width, height)`.\n\n        Returns\n        -------\n        ndarray\n            The bounding box.\n\n        \"\"\"\n        ret = self.mean[:4].copy()\n        ret[2] *= ret[3]\n        ret[:2] -= ret[2:] / 2\n        return ret\n\n    def to_tlbr(self):\n        \"\"\"Get kf estimated current position in bounding box format `(min x, miny, max x,\n        max y)`.\n\n        Returns\n        -------\n        ndarray\n            The predicted kf bounding box.\n\n        \"\"\"\n        ret = self.to_tlwh()\n        ret[2:] = ret[:2] + ret[2:]\n        return ret\n\n\n    def ECC(self, src, dst, warp_mode = cv2.MOTION_EUCLIDEAN, eps = 1e-5,\n        max_iter = 100, scale = 0.1, align = False):\n        \"\"\"Compute the warp matrix from src to dst.\n        Parameters\n        ----------\n        src : ndarray \n            An NxM matrix of source img(BGR or Gray), it must be the same format as dst.\n        dst : ndarray\n            An NxM matrix of target img(BGR or Gray).\n        warp_mode: flags of opencv\n            translation: cv2.MOTION_TRANSLATION\n            rotated and shifted: cv2.MOTION_EUCLIDEAN\n            affine(shift,rotated,shear): cv2.MOTION_AFFINE\n            homography(3d): cv2.MOTION_HOMOGRAPHY\n        eps: float\n            the threshold of the increment in the correlation coefficient between two iterations\n        max_iter: int\n            the number of iterations.\n        scale: float or [int, int]\n            scale_ratio: float\n            scale_size: [W, H]\n        align: bool\n            whether to warp affine or perspective transforms to the source image\n        Returns\n        -------\n        warp matrix : ndarray\n            Returns the warp matrix from src to dst.\n            if motion models is homography, the warp matrix will be 3x3, otherwise 2x3\n        src_aligned: ndarray\n            aligned source image of gray\n        \"\"\"\n\n        # BGR2GRAY\n        if src.ndim == 3:\n            # Convert images to grayscale\n            src = cv2.cvtColor(src, cv2.COLOR_BGR2GRAY)\n            dst = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)\n\n        # make the imgs smaller to speed up\n        if scale is not None:\n            if isinstance(scale, float) or isinstance(scale, int):\n                if scale != 1:\n                    src_r = cv2.resize(src, (0, 0), fx = scale, fy = scale,interpolation =  cv2.INTER_LINEAR)\n                    dst_r = cv2.resize(dst, (0, 0), fx = scale, fy = scale,interpolation =  cv2.INTER_LINEAR)\n                    scale = [scale, scale]\n                else:\n                    src_r, dst_r = src, dst\n                    scale = None\n            else:\n                if scale[0] != src.shape[1] and scale[1] != src.shape[0]:\n                    src_r = cv2.resize(src, (scale[0], scale[1]), interpolation = cv2.INTER_LINEAR)\n                    dst_r = cv2.resize(dst, (scale[0], scale[1]), interpolation=cv2.INTER_LINEAR)\n                    scale = [scale[0] / src.shape[1], scale[1] / src.shape[0]]\n                else:\n                    src_r, dst_r = src, dst\n                    scale = None\n        else:\n            src_r, dst_r = src, dst\n\n        # Define 2x3 or 3x3 matrices and initialize the matrix to identity\n        if warp_mode == cv2.MOTION_HOMOGRAPHY :\n            warp_matrix = np.eye(3, 3, dtype=np.float32)\n        else :\n            warp_matrix = np.eye(2, 3, dtype=np.float32)\n\n        # Define termination criteria\n        criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, max_iter, eps)\n\n        # Run the ECC algorithm. The results are stored in warp_matrix.\n        try:\n            (cc, warp_matrix) = cv2.findTransformECC (src_r, dst_r, warp_matrix, warp_mode, criteria, None, 1)\n        except cv2.error as e:\n            print('ecc transform failed')\n            return None, None\n        \n        if scale is not None:\n            warp_matrix[0, 2] = warp_matrix[0, 2] / scale[0]\n            warp_matrix[1, 2] = warp_matrix[1, 2] / scale[1]\n\n        if align:\n            sz = src.shape\n            if warp_mode == cv2.MOTION_HOMOGRAPHY:\n                # Use warpPerspective for Homography\n                src_aligned = cv2.warpPerspective(src, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR)\n            else :\n                # Use warpAffine for Translation, Euclidean and Affine\n                src_aligned = cv2.warpAffine(src, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR)\n            return warp_matrix, src_aligned\n        else:\n            return warp_matrix, None\n\n\n    def get_matrix(self, matrix):\n        eye = np.eye(3)\n        dist = np.linalg.norm(eye - matrix)\n        if dist < 100:\n            return matrix\n        else:\n            return eye\n\n    def camera_update(self, previous_frame, next_frame):\n        warp_matrix, src_aligned = self.ECC(previous_frame, next_frame)\n        if warp_matrix is None and src_aligned is None:\n            return\n        [a,b] = warp_matrix\n        warp_matrix=np.array([a,b,[0,0,1]])\n        warp_matrix = warp_matrix.tolist()\n        matrix = self.get_matrix(warp_matrix)\n\n        x1, y1, x2, y2 = self.to_tlbr()\n        x1_, y1_, _ = matrix @ np.array([x1, y1, 1]).T\n        x2_, y2_, _ = matrix @ np.array([x2, y2, 1]).T\n        w, h = x2_ - x1_, y2_ - y1_\n        cx, cy = x1_ + w / 2, y1_ + h / 2\n        self.mean[:4] = [cx, cy, w / h, h]\n\n\n    def increment_age(self):\n        self.age += 1\n        self.time_since_update += 1\n\n    def predict(self, kf):\n        \"\"\"Propagate the state distribution to the current time step using a\n        Kalman filter prediction step.\n\n        Parameters\n        ----------\n        kf : kalman_filter.KalmanFilter\n            The Kalman filter.\n\n        \"\"\"\n        self.mean, self.covariance = self.kf.predict(self.mean, self.covariance)\n        self.age += 1\n        self.time_since_update += 1\n        \n    def update_kf(self, bbox, confidence=0.5):\n        self.updates_wo_assignment = self.updates_wo_assignment + 1\n        self.mean, self.covariance = self.kf.update(self.mean, self.covariance, bbox, confidence)\n        tlbr = self.to_tlbr()\n        x_c = int((tlbr[0] + tlbr[2]) / 2)\n        y_c = int((tlbr[1] + tlbr[3]) / 2)\n        self.q.append(('predupdate', (x_c, y_c)))\n\n    def update(self, detection, class_id, conf):\n        \"\"\"Perform Kalman filter measurement update step and update the feature\n        cache.\n        Parameters\n        ----------\n        detection : Detection\n            The associated detection.\n        \"\"\"\n        self.conf = conf\n        self.class_id = class_id.int()\n        self.mean, self.covariance = self.kf.update(self.mean, self.covariance, detection.to_xyah(), detection.confidence)\n\n        feature = detection.feature / np.linalg.norm(detection.feature)\n\n        smooth_feat = self.ema_alpha * self.features[-1] + (1 - self.ema_alpha) * feature\n        smooth_feat /= np.linalg.norm(smooth_feat)\n        self.features = [smooth_feat]\n\n        self.hits += 1\n        self.time_since_update = 0\n        if self.state == TrackState.Tentative and self.hits >= self._n_init:\n            self.state = TrackState.Confirmed\n        \n        tlbr = self.to_tlbr()\n        x_c = int((tlbr[0] + tlbr[2]) / 2)\n        y_c = int((tlbr[1] + tlbr[3]) / 2)\n        self.q.append(('observationupdate', (x_c, y_c)))\n\n    def mark_missed(self):\n        \"\"\"Mark this track as missed (no association at the current time step).\n        \"\"\"\n        if self.state == TrackState.Tentative:\n            self.state = TrackState.Deleted\n        elif self.time_since_update > self._max_age:\n            self.state = TrackState.Deleted\n\n    def is_tentative(self):\n        \"\"\"Returns True if this track is tentative (unconfirmed).\n        \"\"\"\n        return self.state == TrackState.Tentative\n\n    def is_confirmed(self):\n        \"\"\"Returns True if this track is confirmed.\"\"\"\n        return self.state == TrackState.Confirmed\n\n    def is_deleted(self):\n        \"\"\"Returns True if this track is dead and should be deleted.\"\"\"\n        return self.state == TrackState.Deleted\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/sort/tracker.py",
    "content": "# vim: expandtab:ts=4:sw=4\nfrom __future__ import absolute_import\nimport numpy as np\nfrom . import kalman_filter\nfrom . import linear_assignment\nfrom . import iou_matching\nfrom . import detection\nfrom .track import Track\n\n\nclass Tracker:\n    \"\"\"\n    This is the multi-target tracker.\n    Parameters\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        A distance metric for measurement-to-track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of consecutive detections before the track is confirmed. The\n        track state is set to `Deleted` if a miss occurs within the first\n        `n_init` frames.\n    Attributes\n    ----------\n    metric : nn_matching.NearestNeighborDistanceMetric\n        The distance metric used for measurement to track association.\n    max_age : int\n        Maximum number of missed misses before a track is deleted.\n    n_init : int\n        Number of frames that a track remains in initialization phase.\n    kf : kalman_filter.KalmanFilter\n        A Kalman filter to filter target trajectories in image space.\n    tracks : List[Track]\n        The list of active tracks at the current time step.\n    \"\"\"\n    GATING_THRESHOLD = np.sqrt(kalman_filter.chi2inv95[4])\n\n    def __init__(self, metric, max_iou_dist=0.9, max_age=30, max_unmatched_preds=7, n_init=3, _lambda=0, ema_alpha=0.9, mc_lambda=0.995):\n        self.metric = metric\n        self.max_iou_dist = max_iou_dist\n        self.max_age = max_age\n        self.n_init = n_init\n        self._lambda = _lambda\n        self.ema_alpha = ema_alpha\n        self.mc_lambda = mc_lambda\n        self.max_unmatched_preds = max_unmatched_preds\n        \n        self.kf = kalman_filter.KalmanFilter()\n        self.tracks = []\n        self._next_id = 1\n\n    def predict(self):\n        \"\"\"Propagate track state distributions one time step forward.\n\n        This function should be called once every time step, before `update`.\n        \"\"\"\n        for track in self.tracks:\n            track.predict(self.kf)\n\n    def increment_ages(self):\n        for track in self.tracks:\n            track.increment_age()\n            track.mark_missed()\n\n    def camera_update(self, previous_img, current_img):\n        for track in self.tracks:\n            track.camera_update(previous_img, current_img)\n            \n    def pred_n_update_all_tracks(self):\n        \"\"\"Perform predictions and updates for all tracks by its own predicted state.\n\n        \"\"\"\n        self.predict()\n        for t in self.tracks:\n            if self.max_unmatched_preds != 0 and t.updates_wo_assignment < t.max_num_updates_wo_assignment:\n                bbox = t.to_tlwh()\n                t.update_kf(detection.to_xyah_ext(bbox))\n\n    def update(self, detections, classes, confidences):\n        \"\"\"Perform measurement update and track management.\n\n        Parameters\n        ----------\n        detections : List[deep_sort.detection.Detection]\n            A list of detections at the current time step.\n\n        \"\"\"\n        # Run matching cascade.\n        matches, unmatched_tracks, unmatched_detections = \\\n            self._match(detections)\n\n        # Update track set.\n        for track_idx, detection_idx in matches:\n            self.tracks[track_idx].update(\n                detections[detection_idx], classes[detection_idx], confidences[detection_idx])\n        for track_idx in unmatched_tracks:\n            self.tracks[track_idx].mark_missed()\n            if self.max_unmatched_preds != 0 and self.tracks[track_idx].updates_wo_assignment < self.tracks[track_idx].max_num_updates_wo_assignment:\n                bbox = self.tracks[track_idx].to_tlwh()\n                self.tracks[track_idx].update_kf(detection.to_xyah_ext(bbox))\n        for detection_idx in unmatched_detections:\n            self._initiate_track(detections[detection_idx], classes[detection_idx].item(), confidences[detection_idx].item())\n        self.tracks = [t for t in self.tracks if not t.is_deleted()]\n\n        # Update distance metric.\n        active_targets = [t.track_id for t in self.tracks if t.is_confirmed()]\n        features, targets = [], []\n        for track in self.tracks:\n            if not track.is_confirmed():\n                continue\n            features += track.features\n            targets += [track.track_id for _ in track.features]\n        self.metric.partial_fit(np.asarray(features), np.asarray(targets), active_targets)\n\n    def _full_cost_metric(self, tracks, dets, track_indices, detection_indices):\n        \"\"\"\n        This implements the full lambda-based cost-metric. However, in doing so, it disregards\n        the possibility to gate the position only which is provided by\n        linear_assignment.gate_cost_matrix(). Instead, I gate by everything.\n        Note that the Mahalanobis distance is itself an unnormalised metric. Given the cosine\n        distance being normalised, we employ a quick and dirty normalisation based on the\n        threshold: that is, we divide the positional-cost by the gating threshold, thus ensuring\n        that the valid values range 0-1.\n        Note also that the authors work with the squared distance. I also sqrt this, so that it\n        is more intuitive in terms of values.\n        \"\"\"\n        # Compute First the Position-based Cost Matrix\n        pos_cost = np.empty([len(track_indices), len(detection_indices)])\n        msrs = np.asarray([dets[i].to_xyah() for i in detection_indices])\n        for row, track_idx in enumerate(track_indices):\n            pos_cost[row, :] = np.sqrt(\n                self.kf.gating_distance(\n                    tracks[track_idx].mean, tracks[track_idx].covariance, msrs, False\n                )\n            ) / self.GATING_THRESHOLD\n        pos_gate = pos_cost > 1.0\n        # Now Compute the Appearance-based Cost Matrix\n        app_cost = self.metric.distance(\n            np.array([dets[i].feature for i in detection_indices]),\n            np.array([tracks[i].track_id for i in track_indices]),\n        )\n        app_gate = app_cost > self.metric.matching_threshold\n        # Now combine and threshold\n        cost_matrix = self._lambda * pos_cost + (1 - self._lambda) * app_cost\n        cost_matrix[np.logical_or(pos_gate, app_gate)] = linear_assignment.INFTY_COST\n        # Return Matrix\n        return cost_matrix\n\n    def _match(self, detections):\n\n        def gated_metric(tracks, dets, track_indices, detection_indices):\n            features = np.array([dets[i].feature for i in detection_indices])\n            targets = np.array([tracks[i].track_id for i in track_indices])\n            cost_matrix = self.metric.distance(features, targets)\n            cost_matrix = linear_assignment.gate_cost_matrix(cost_matrix, tracks, dets, track_indices, detection_indices, self.mc_lambda)\n\n            return cost_matrix\n\n        # Split track set into confirmed and unconfirmed tracks.\n        confirmed_tracks = [\n            i for i, t in enumerate(self.tracks) if t.is_confirmed()]\n        unconfirmed_tracks = [\n            i for i, t in enumerate(self.tracks) if not t.is_confirmed()]\n\n        # Associate confirmed tracks using appearance features.\n        matches_a, unmatched_tracks_a, unmatched_detections = \\\n            linear_assignment.matching_cascade(\n                gated_metric, self.metric.matching_threshold, self.max_age,\n                self.tracks, detections, confirmed_tracks)\n\n        # Associate remaining tracks together with unconfirmed tracks using IOU.\n        iou_track_candidates = unconfirmed_tracks + [\n            k for k in unmatched_tracks_a if\n            self.tracks[k].time_since_update == 1]\n        unmatched_tracks_a = [\n            k for k in unmatched_tracks_a if\n            self.tracks[k].time_since_update != 1]\n        matches_b, unmatched_tracks_b, unmatched_detections = \\\n            linear_assignment.min_cost_matching(\n                iou_matching.iou_cost, self.max_iou_dist, self.tracks,\n                detections, iou_track_candidates, unmatched_detections)\n\n        matches = matches_a + matches_b\n        unmatched_tracks = list(set(unmatched_tracks_a + unmatched_tracks_b))\n        return matches, unmatched_tracks, unmatched_detections\n\n    def _initiate_track(self, detection, class_id, conf):\n        self.tracks.append(Track(\n            detection.to_xyah(), self._next_id, class_id, conf, self.n_init, self.max_age, self.ema_alpha,\n            detection.feature))\n        self._next_id += 1\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/strong_sort.py",
    "content": "import numpy as np\nimport torch\nimport sys\nimport cv2\nimport gdown\nfrom os.path import exists as file_exists, join\nimport torchvision.transforms as transforms\n\nfrom .sort.nn_matching import NearestNeighborDistanceMetric\nfrom .sort.detection import Detection\nfrom .sort.tracker import Tracker\n\nfrom .reid_multibackend import ReIDDetectMultiBackend\n\nfrom ultralytics.yolo.utils.ops import xyxy2xywh\n\n\nclass StrongSORT(object):\n    def __init__(self, \n                 model_weights,\n                 device,\n                 fp16,\n                 max_dist=0.2,\n                 max_iou_dist=0.7,\n                 max_age=70,\n                 max_unmatched_preds=7,\n                 n_init=3,\n                 nn_budget=100,\n                 mc_lambda=0.995,\n                 ema_alpha=0.9\n                ):\n\n        self.model = ReIDDetectMultiBackend(weights=model_weights, device=device, fp16=fp16)\n        \n        self.max_dist = max_dist\n        metric = NearestNeighborDistanceMetric(\n            \"cosine\", self.max_dist, nn_budget)\n        self.tracker = Tracker(\n            metric, max_iou_dist=max_iou_dist, max_age=max_age, n_init=n_init, max_unmatched_preds=max_unmatched_preds, mc_lambda=mc_lambda, ema_alpha=ema_alpha)\n\n    def update(self, dets,  ori_img):\n        \n        xyxys = dets[:, 0:4]\n        confs = dets[:, 4]\n        clss = dets[:, 5]\n        \n        classes = clss.numpy()\n        xywhs = xyxy2xywh(xyxys.numpy())\n        confs = confs.numpy()\n        self.height, self.width = ori_img.shape[:2]\n        \n        # generate detections\n        features = self._get_features(xywhs, ori_img)\n        bbox_tlwh = self._xywh_to_tlwh(xywhs)\n        detections = [Detection(bbox_tlwh[i], conf, features[i]) for i, conf in enumerate(\n            confs)]\n\n        # run on non-maximum supression\n        boxes = np.array([d.tlwh for d in detections])\n        scores = np.array([d.confidence for d in detections])\n\n        # update tracker\n        self.tracker.predict()\n        self.tracker.update(detections, clss, confs)\n\n        # output bbox identities\n        outputs = []\n        for track in self.tracker.tracks:\n            if not track.is_confirmed() or track.time_since_update > 1:\n                continue\n\n            box = track.to_tlwh()\n            x1, y1, x2, y2 = self._tlwh_to_xyxy(box)\n            \n            track_id = track.track_id\n            class_id = track.class_id\n            conf = track.conf\n            queue = track.q\n            outputs.append(np.array([x1, y1, x2, y2, track_id, class_id, conf, queue], dtype=object))\n        if len(outputs) > 0:\n            outputs = np.stack(outputs, axis=0)\n        return outputs\n\n    \"\"\"\n    TODO:\n        Convert bbox from xc_yc_w_h to xtl_ytl_w_h\n    Thanks JieChen91@github.com for reporting this bug!\n    \"\"\"\n    @staticmethod\n    def _xywh_to_tlwh(bbox_xywh):\n        if isinstance(bbox_xywh, np.ndarray):\n            bbox_tlwh = bbox_xywh.copy()\n        elif isinstance(bbox_xywh, torch.Tensor):\n            bbox_tlwh = bbox_xywh.clone()\n        bbox_tlwh[:, 0] = bbox_xywh[:, 0] - bbox_xywh[:, 2] / 2.\n        bbox_tlwh[:, 1] = bbox_xywh[:, 1] - bbox_xywh[:, 3] / 2.\n        return bbox_tlwh\n\n    def _xywh_to_xyxy(self, bbox_xywh):\n        x, y, w, h = bbox_xywh\n        x1 = max(int(x - w / 2), 0)\n        x2 = min(int(x + w / 2), self.width - 1)\n        y1 = max(int(y - h / 2), 0)\n        y2 = min(int(y + h / 2), self.height - 1)\n        return x1, y1, x2, y2\n\n    def _tlwh_to_xyxy(self, bbox_tlwh):\n        \"\"\"\n        TODO:\n            Convert bbox from xtl_ytl_w_h to xc_yc_w_h\n        Thanks JieChen91@github.com for reporting this bug!\n        \"\"\"\n        x, y, w, h = bbox_tlwh\n        x1 = max(int(x), 0)\n        x2 = min(int(x+w), self.width - 1)\n        y1 = max(int(y), 0)\n        y2 = min(int(y+h), self.height - 1)\n        return x1, y1, x2, y2\n\n    def increment_ages(self):\n        self.tracker.increment_ages()\n\n    def _xyxy_to_tlwh(self, bbox_xyxy):\n        x1, y1, x2, y2 = bbox_xyxy\n\n        t = x1\n        l = y1\n        w = int(x2 - x1)\n        h = int(y2 - y1)\n        return t, l, w, h\n\n    def _get_features(self, bbox_xywh, ori_img):\n        im_crops = []\n        for box in bbox_xywh:\n            x1, y1, x2, y2 = self._xywh_to_xyxy(box)\n            im = ori_img[y1:y2, x1:x2]\n            im_crops.append(im)\n        if im_crops:\n            features = self.model(im_crops)\n        else:\n            features = np.array([])\n        return features\n    \n    def trajectory(self, im0, q, color):\n        # Add rectangle to image (PIL-only)\n        for i, p in enumerate(q):\n            thickness = int(np.sqrt(float (i + 1)) * 1.5)\n            if p[0] == 'observationupdate': \n                cv2.circle(im0, p[1], 2, color=color, thickness=thickness)\n            else:\n                cv2.circle(im0, p[1], 2, color=(255,255,255), thickness=thickness)\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/__init__.py",
    "content": ""
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/asserts.py",
    "content": "from os import environ\n\n\ndef assert_in(file, files_to_check):\n    if file not in files_to_check:\n        raise AssertionError(\"{} does not exist in the list\".format(str(file)))\n    return True\n\n\ndef assert_in_env(check_list: list):\n    for item in check_list:\n        assert_in(item, environ.keys())\n    return True\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/draw.py",
    "content": "import numpy as np\nimport cv2\n\npalette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)\n\n\ndef compute_color_for_labels(label):\n    \"\"\"\n    Simple function that adds fixed color depending on the class\n    \"\"\"\n    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]\n    return tuple(color)\n\n\ndef draw_boxes(img, bbox, identities=None, offset=(0,0)):\n    for i,box in enumerate(bbox):\n        x1,y1,x2,y2 = [int(i) for i in box]\n        x1 += offset[0]\n        x2 += offset[0]\n        y1 += offset[1]\n        y2 += offset[1]\n        # box text and bar\n        id = int(identities[i]) if identities is not None else 0    \n        color = compute_color_for_labels(id)\n        label = '{}{:d}'.format(\"\", id)\n        t_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_PLAIN, 2 , 2)[0]\n        cv2.rectangle(img,(x1, y1),(x2,y2),color,3)\n        cv2.rectangle(img,(x1, y1),(x1+t_size[0]+3,y1+t_size[1]+4), color,-1)\n        cv2.putText(img,label,(x1,y1+t_size[1]+4), cv2.FONT_HERSHEY_PLAIN, 2, [255,255,255], 2)\n    return img\n\n\n\nif __name__ == '__main__':\n    for i in range(82):\n        print(compute_color_for_labels(i))\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/evaluation.py",
    "content": "import os\nimport numpy as np\nimport copy\nimport motmetrics as mm\nmm.lap.default_solver = 'lap'\nfrom utils.io import read_results, unzip_objs\n\n\nclass Evaluator(object):\n\n    def __init__(self, data_root, seq_name, data_type):\n        self.data_root = data_root\n        self.seq_name = seq_name\n        self.data_type = data_type\n\n        self.load_annotations()\n        self.reset_accumulator()\n\n    def load_annotations(self):\n        assert self.data_type == 'mot'\n\n        gt_filename = os.path.join(self.data_root, self.seq_name, 'gt', 'gt.txt')\n        self.gt_frame_dict = read_results(gt_filename, self.data_type, is_gt=True)\n        self.gt_ignore_frame_dict = read_results(gt_filename, self.data_type, is_ignore=True)\n\n    def reset_accumulator(self):\n        self.acc = mm.MOTAccumulator(auto_id=True)\n\n    def eval_frame(self, frame_id, trk_tlwhs, trk_ids, rtn_events=False):\n        # results\n        trk_tlwhs = np.copy(trk_tlwhs)\n        trk_ids = np.copy(trk_ids)\n\n        # gts\n        gt_objs = self.gt_frame_dict.get(frame_id, [])\n        gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]\n\n        # ignore boxes\n        ignore_objs = self.gt_ignore_frame_dict.get(frame_id, [])\n        ignore_tlwhs = unzip_objs(ignore_objs)[0]\n\n\n        # remove ignored results\n        keep = np.ones(len(trk_tlwhs), dtype=bool)\n        iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)\n        if len(iou_distance) > 0:\n            match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)\n            match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])\n            match_ious = iou_distance[match_is, match_js]\n\n            match_js = np.asarray(match_js, dtype=int)\n            match_js = match_js[np.logical_not(np.isnan(match_ious))]\n            keep[match_js] = False\n            trk_tlwhs = trk_tlwhs[keep]\n            trk_ids = trk_ids[keep]\n\n        # get distance matrix\n        iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)\n\n        # acc\n        self.acc.update(gt_ids, trk_ids, iou_distance)\n\n        if rtn_events and iou_distance.size > 0 and hasattr(self.acc, 'last_mot_events'):\n            events = self.acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics\n        else:\n            events = None\n        return events\n\n    def eval_file(self, filename):\n        self.reset_accumulator()\n\n        result_frame_dict = read_results(filename, self.data_type, is_gt=False)\n        frames = sorted(list(set(self.gt_frame_dict.keys()) | set(result_frame_dict.keys())))\n        for frame_id in frames:\n            trk_objs = result_frame_dict.get(frame_id, [])\n            trk_tlwhs, trk_ids = unzip_objs(trk_objs)[:2]\n            self.eval_frame(frame_id, trk_tlwhs, trk_ids, rtn_events=False)\n\n        return self.acc\n\n    @staticmethod\n    def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):\n        names = copy.deepcopy(names)\n        if metrics is None:\n            metrics = mm.metrics.motchallenge_metrics\n        metrics = copy.deepcopy(metrics)\n\n        mh = mm.metrics.create()\n        summary = mh.compute_many(\n            accs,\n            metrics=metrics,\n            names=names,\n            generate_overall=True\n        )\n\n        return summary\n\n    @staticmethod\n    def save_summary(summary, filename):\n        import pandas as pd\n        writer = pd.ExcelWriter(filename)\n        summary.to_excel(writer)\n        writer.save()\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/io.py",
    "content": "import os\nfrom typing import Dict\nimport numpy as np\n\n# from utils.log import get_logger\n\n\ndef write_results(filename, results, data_type):\n    if data_type == 'mot':\n        save_format = '{frame},{id},{x1},{y1},{w},{h},-1,-1,-1,-1\\n'\n    elif data_type == 'kitti':\n        save_format = '{frame} {id} pedestrian 0 0 -10 {x1} {y1} {x2} {y2} -10 -10 -10 -1000 -1000 -1000 -10\\n'\n    else:\n        raise ValueError(data_type)\n\n    with open(filename, 'w') as f:\n        for frame_id, tlwhs, track_ids in results:\n            if data_type == 'kitti':\n                frame_id -= 1\n            for tlwh, track_id in zip(tlwhs, track_ids):\n                if track_id < 0:\n                    continue\n                x1, y1, w, h = tlwh\n                x2, y2 = x1 + w, y1 + h\n                line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h)\n                f.write(line)\n\n\n# def write_results(filename, results_dict: Dict, data_type: str):\n#     if not filename:\n#         return\n#     path = os.path.dirname(filename)\n#     if not os.path.exists(path):\n#         os.makedirs(path)\n\n#     if data_type in ('mot', 'mcmot', 'lab'):\n#         save_format = '{frame},{id},{x1},{y1},{w},{h},1,-1,-1,-1\\n'\n#     elif data_type == 'kitti':\n#         save_format = '{frame} {id} pedestrian -1 -1 -10 {x1} {y1} {x2} {y2} -1 -1 -1 -1000 -1000 -1000 -10 {score}\\n'\n#     else:\n#         raise ValueError(data_type)\n\n#     with open(filename, 'w') as f:\n#         for frame_id, frame_data in results_dict.items():\n#             if data_type == 'kitti':\n#                 frame_id -= 1\n#             for tlwh, track_id in frame_data:\n#                 if track_id < 0:\n#                     continue\n#                 x1, y1, w, h = tlwh\n#                 x2, y2 = x1 + w, y1 + h\n#                 line = save_format.format(frame=frame_id, id=track_id, x1=x1, y1=y1, x2=x2, y2=y2, w=w, h=h, score=1.0)\n#                 f.write(line)\n#     logger.info('Save results to {}'.format(filename))\n\n\ndef read_results(filename, data_type: str, is_gt=False, is_ignore=False):\n    if data_type in ('mot', 'lab'):\n        read_fun = read_mot_results\n    else:\n        raise ValueError('Unknown data type: {}'.format(data_type))\n\n    return read_fun(filename, is_gt, is_ignore)\n\n\n\"\"\"\nlabels={'ped', ...\t\t\t% 1\n'person_on_vhcl', ...\t% 2\n'car', ...\t\t\t\t% 3\n'bicycle', ...\t\t\t% 4\n'mbike', ...\t\t\t% 5\n'non_mot_vhcl', ...\t\t% 6\n'static_person', ...\t% 7\n'distractor', ...\t\t% 8\n'occluder', ...\t\t\t% 9\n'occluder_on_grnd', ...\t\t%10\n'occluder_full', ...\t\t% 11\n'reflection', ...\t\t% 12\n'crowd' ...\t\t\t% 13\n};\n\"\"\"\n\n\ndef read_mot_results(filename, is_gt, is_ignore):\n    valid_labels = {1}\n    ignore_labels = {2, 7, 8, 12}\n    results_dict = dict()\n    if os.path.isfile(filename):\n        with open(filename, 'r') as f:\n            for line in f.readlines():\n                linelist = line.split(',')\n                if len(linelist) < 7:\n                    continue\n                fid = int(linelist[0])\n                if fid < 1:\n                    continue\n                results_dict.setdefault(fid, list())\n\n                if is_gt:\n                    if 'MOT16-' in filename or 'MOT17-' in filename:\n                        label = int(float(linelist[7]))\n                        mark = int(float(linelist[6]))\n                        if mark == 0 or label not in valid_labels:\n                            continue\n                    score = 1\n                elif is_ignore:\n                    if 'MOT16-' in filename or 'MOT17-' in filename:\n                        label = int(float(linelist[7]))\n                        vis_ratio = float(linelist[8])\n                        if label not in ignore_labels and vis_ratio >= 0:\n                            continue\n                    else:\n                        continue\n                    score = 1\n                else:\n                    score = float(linelist[6])\n\n                tlwh = tuple(map(float, linelist[2:6]))\n                target_id = int(linelist[1])\n\n                results_dict[fid].append((tlwh, target_id, score))\n\n    return results_dict\n\n\ndef unzip_objs(objs):\n    if len(objs) > 0:\n        tlwhs, ids, scores = zip(*objs)\n    else:\n        tlwhs, ids, scores = [], [], []\n    tlwhs = np.asarray(tlwhs, dtype=float).reshape(-1, 4)\n\n    return tlwhs, ids, scores"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/json_logger.py",
    "content": "\"\"\"\nReferences:\n    https://medium.com/analytics-vidhya/creating-a-custom-logging-mechanism-for-real-time-object-detection-using-tdd-4ca2cfcd0a2f\n\"\"\"\nimport json\nfrom os import makedirs\nfrom os.path import exists, join\nfrom datetime import datetime\n\n\nclass JsonMeta(object):\n    HOURS = 3\n    MINUTES = 59\n    SECONDS = 59\n    PATH_TO_SAVE = 'LOGS'\n    DEFAULT_FILE_NAME = 'remaining'\n\n\nclass BaseJsonLogger(object):\n    \"\"\"\n    This is the base class that returns __dict__ of its own\n    it also returns the dicts of objects in the attributes that are list instances\n\n    \"\"\"\n\n    def dic(self):\n        # returns dicts of objects\n        out = {}\n        for k, v in self.__dict__.items():\n            if hasattr(v, 'dic'):\n                out[k] = v.dic()\n            elif isinstance(v, list):\n                out[k] = self.list(v)\n            else:\n                out[k] = v\n        return out\n\n    @staticmethod\n    def list(values):\n        # applies the dic method on items in the list\n        return [v.dic() if hasattr(v, 'dic') else v for v in values]\n\n\nclass Label(BaseJsonLogger):\n    \"\"\"\n    For each bounding box there are various categories with confidences. Label class keeps track of that information.\n    \"\"\"\n\n    def __init__(self, category: str, confidence: float):\n        self.category = category\n        self.confidence = confidence\n\n\nclass Bbox(BaseJsonLogger):\n    \"\"\"\n    This module stores the information for each frame and use them in JsonParser\n    Attributes:\n        labels (list): List of label module.\n        top (int):\n        left (int):\n        width (int):\n        height (int):\n\n    Args:\n        bbox_id (float):\n        top (int):\n        left (int):\n        width (int):\n        height (int):\n\n    References:\n        Check Label module for better understanding.\n\n\n    \"\"\"\n\n    def __init__(self, bbox_id, top, left, width, height):\n        self.labels = []\n        self.bbox_id = bbox_id\n        self.top = top\n        self.left = left\n        self.width = width\n        self.height = height\n\n    def add_label(self, category, confidence):\n        # adds category and confidence only if top_k is not exceeded.\n        self.labels.append(Label(category, confidence))\n\n    def labels_full(self, value):\n        return len(self.labels) == value\n\n\nclass Frame(BaseJsonLogger):\n    \"\"\"\n    This module stores the information for each frame and use them in JsonParser\n    Attributes:\n        timestamp (float): The elapsed time of captured frame\n        frame_id (int): The frame number of the captured video\n        bboxes (list of Bbox objects): Stores the list of bbox objects.\n\n    References:\n        Check Bbox class for better information\n\n    Args:\n        timestamp (float):\n        frame_id (int):\n\n    \"\"\"\n\n    def __init__(self, frame_id: int, timestamp: float = None):\n        self.frame_id = frame_id\n        self.timestamp = timestamp\n        self.bboxes = []\n\n    def add_bbox(self, bbox_id: int, top: int, left: int, width: int, height: int):\n        bboxes_ids = [bbox.bbox_id for bbox in self.bboxes]\n        if bbox_id not in bboxes_ids:\n            self.bboxes.append(Bbox(bbox_id, top, left, width, height))\n        else:\n            raise ValueError(\"Frame with id: {} already has a Bbox with id: {}\".format(self.frame_id, bbox_id))\n\n    def add_label_to_bbox(self, bbox_id: int, category: str, confidence: float):\n        bboxes = {bbox.id: bbox for bbox in self.bboxes}\n        if bbox_id in bboxes.keys():\n            res = bboxes.get(bbox_id)\n            res.add_label(category, confidence)\n        else:\n            raise ValueError('the bbox with id: {} does not exists!'.format(bbox_id))\n\n\nclass BboxToJsonLogger(BaseJsonLogger):\n    \"\"\"\n    ُ This module is designed to automate the task of logging jsons. An example json is used\n    to show the contents of json file shortly\n    Example:\n          {\n          \"video_details\": {\n            \"frame_width\": 1920,\n            \"frame_height\": 1080,\n            \"frame_rate\": 20,\n            \"video_name\": \"/home/gpu/codes/MSD/pedestrian_2/project/public/camera1.avi\"\n          },\n          \"frames\": [\n            {\n              \"frame_id\": 329,\n              \"timestamp\": 3365.1254\n              \"bboxes\": [\n                {\n                  \"labels\": [\n                    {\n                      \"category\": \"pedestrian\",\n                      \"confidence\": 0.9\n                    }\n                  ],\n                  \"bbox_id\": 0,\n                  \"top\": 1257,\n                  \"left\": 138,\n                  \"width\": 68,\n                  \"height\": 109\n                }\n              ]\n            }],\n\n    Attributes:\n        frames (dict): It's a dictionary that maps each frame_id to json attributes.\n        video_details (dict): information about video file.\n        top_k_labels (int): shows the allowed number of labels\n        start_time (datetime object): we use it to automate the json output by time.\n\n    Args:\n        top_k_labels (int): shows the allowed number of labels\n\n    \"\"\"\n\n    def __init__(self, top_k_labels: int = 1):\n        self.frames = {}\n        self.video_details = self.video_details = dict(frame_width=None, frame_height=None, frame_rate=None,\n                                                       video_name=None)\n        self.top_k_labels = top_k_labels\n        self.start_time = datetime.now()\n\n    def set_top_k(self, value):\n        self.top_k_labels = value\n\n    def frame_exists(self, frame_id: int) -> bool:\n        \"\"\"\n        Args:\n            frame_id (int):\n\n        Returns:\n            bool: true if frame_id is recognized\n        \"\"\"\n        return frame_id in self.frames.keys()\n\n    def add_frame(self, frame_id: int, timestamp: float = None) -> None:\n        \"\"\"\n        Args:\n            frame_id (int):\n            timestamp (float): opencv captured frame time property\n\n        Raises:\n             ValueError: if frame_id would not exist in class frames attribute\n\n        Returns:\n            None\n\n        \"\"\"\n        if not self.frame_exists(frame_id):\n            self.frames[frame_id] = Frame(frame_id, timestamp)\n        else:\n            raise ValueError(\"Frame id: {} already exists\".format(frame_id))\n\n    def bbox_exists(self, frame_id: int, bbox_id: int) -> bool:\n        \"\"\"\n        Args:\n            frame_id:\n            bbox_id:\n\n        Returns:\n            bool: if bbox exists in frame bboxes list\n        \"\"\"\n        bboxes = []\n        if self.frame_exists(frame_id=frame_id):\n            bboxes = [bbox.bbox_id for bbox in self.frames[frame_id].bboxes]\n        return bbox_id in bboxes\n\n    def find_bbox(self, frame_id: int, bbox_id: int):\n        \"\"\"\n\n        Args:\n            frame_id:\n            bbox_id:\n\n        Returns:\n            bbox_id (int):\n\n        Raises:\n            ValueError: if bbox_id does not exist in the bbox list of specific frame.\n        \"\"\"\n        if not self.bbox_exists(frame_id, bbox_id):\n            raise ValueError(\"frame with id: {} does not contain bbox with id: {}\".format(frame_id, bbox_id))\n        bboxes = {bbox.bbox_id: bbox for bbox in self.frames[frame_id].bboxes}\n        return bboxes.get(bbox_id)\n\n    def add_bbox_to_frame(self, frame_id: int, bbox_id: int, top: int, left: int, width: int, height: int) -> None:\n        \"\"\"\n\n        Args:\n            frame_id (int):\n            bbox_id (int):\n            top (int):\n            left (int):\n            width (int):\n            height (int):\n\n        Returns:\n            None\n\n        Raises:\n            ValueError: if bbox_id already exist in frame information with frame_id\n            ValueError: if frame_id does not exist in frames attribute\n        \"\"\"\n        if self.frame_exists(frame_id):\n            frame = self.frames[frame_id]\n            if not self.bbox_exists(frame_id, bbox_id):\n                frame.add_bbox(bbox_id, top, left, width, height)\n            else:\n                raise ValueError(\n                    \"frame with frame_id: {} already contains the bbox with id: {} \".format(frame_id, bbox_id))\n        else:\n            raise ValueError(\"frame with frame_id: {} does not exist\".format(frame_id))\n\n    def add_label_to_bbox(self, frame_id: int, bbox_id: int, category: str, confidence: float):\n        \"\"\"\n        Args:\n            frame_id:\n            bbox_id:\n            category:\n            confidence: the confidence value returned from yolo detection\n\n        Returns:\n            None\n\n        Raises:\n            ValueError: if labels quota (top_k_labels) exceeds.\n        \"\"\"\n        bbox = self.find_bbox(frame_id, bbox_id)\n        if not bbox.labels_full(self.top_k_labels):\n            bbox.add_label(category, confidence)\n        else:\n            raise ValueError(\"labels in frame_id: {}, bbox_id: {} is fulled\".format(frame_id, bbox_id))\n\n    def add_video_details(self, frame_width: int = None, frame_height: int = None, frame_rate: int = None,\n                          video_name: str = None):\n        self.video_details['frame_width'] = frame_width\n        self.video_details['frame_height'] = frame_height\n        self.video_details['frame_rate'] = frame_rate\n        self.video_details['video_name'] = video_name\n\n    def output(self):\n        output = {'video_details': self.video_details}\n        result = list(self.frames.values())\n        output['frames'] = [item.dic() for item in result]\n        return output\n\n    def json_output(self, output_name):\n        \"\"\"\n        Args:\n            output_name:\n\n        Returns:\n            None\n\n        Notes:\n            It creates the json output with `output_name` name.\n        \"\"\"\n        if not output_name.endswith('.json'):\n            output_name += '.json'\n        with open(output_name, 'w') as file:\n            json.dump(self.output(), file)\n        file.close()\n\n    def set_start(self):\n        self.start_time = datetime.now()\n\n    def schedule_output_by_time(self, output_dir=JsonMeta.PATH_TO_SAVE, hours: int = 0, minutes: int = 0,\n                                seconds: int = 60) -> None:\n        \"\"\"\n        Notes:\n            Creates folder and then periodically stores the jsons on that address.\n\n        Args:\n            output_dir (str): the directory where output files will be stored\n            hours (int):\n            minutes (int):\n            seconds (int):\n\n        Returns:\n            None\n\n        \"\"\"\n        end = datetime.now()\n        interval = 0\n        interval += abs(min([hours, JsonMeta.HOURS]) * 3600)\n        interval += abs(min([minutes, JsonMeta.MINUTES]) * 60)\n        interval += abs(min([seconds, JsonMeta.SECONDS]))\n        diff = (end - self.start_time).seconds\n\n        if diff > interval:\n            output_name = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '.json'\n            if not exists(output_dir):\n                makedirs(output_dir)\n            output = join(output_dir, output_name)\n            self.json_output(output_name=output)\n            self.frames = {}\n            self.start_time = datetime.now()\n\n    def schedule_output_by_frames(self, frames_quota, frame_counter, output_dir=JsonMeta.PATH_TO_SAVE):\n        \"\"\"\n        saves as the number of frames quota increases higher.\n        :param frames_quota:\n        :param frame_counter:\n        :param output_dir:\n        :return:\n        \"\"\"\n        pass\n\n    def flush(self, output_dir):\n        \"\"\"\n        Notes:\n            We use this function to output jsons whenever possible.\n            like the time that we exit the while loop of opencv.\n\n        Args:\n            output_dir:\n\n        Returns:\n            None\n\n        \"\"\"\n        filename = self.start_time.strftime('%Y-%m-%d %H-%M-%S') + '-remaining.json'\n        output = join(output_dir, filename)\n        self.json_output(output_name=output)\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/log.py",
    "content": "import logging\n\n\ndef get_logger(name='root'):\n    formatter = logging.Formatter(\n        # fmt='%(asctime)s [%(levelname)s]: %(filename)s(%(funcName)s:%(lineno)s) >> %(message)s')\n        fmt='%(asctime)s [%(levelname)s]: %(message)s', datefmt='%Y-%m-%d %H:%M:%S')\n\n    handler = logging.StreamHandler()\n    handler.setFormatter(formatter)\n\n    logger = logging.getLogger(name)\n    logger.setLevel(logging.INFO)\n    logger.addHandler(handler)\n    return logger\n\n\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/parser.py",
    "content": "import os\nimport yaml\nfrom easydict import EasyDict as edict\n\n\nclass YamlParser(edict):\n    \"\"\"\n    This is yaml parser based on EasyDict.\n    \"\"\"\n\n    def __init__(self, cfg_dict=None, config_file=None):\n        if cfg_dict is None:\n            cfg_dict = {}\n\n        if config_file is not None:\n            assert(os.path.isfile(config_file))\n            with open(config_file, 'r') as fo:\n                yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader)\n                cfg_dict.update(yaml_)\n\n        super(YamlParser, self).__init__(cfg_dict)\n\n    def merge_from_file(self, config_file):\n        with open(config_file, 'r') as fo:\n            yaml_ = yaml.load(fo.read(), Loader=yaml.FullLoader)\n            self.update(yaml_)\n\n    def merge_from_dict(self, config_dict):\n        self.update(config_dict)\n\n\ndef get_config(config_file=None):\n    return YamlParser(config_file=config_file)\n\n\nif __name__ == \"__main__\":\n    cfg = YamlParser(config_file=\"../configs/yolov3.yaml\")\n    cfg.merge_from_file(\"../configs/strong_sort.yaml\")\n\n    import ipdb\n    ipdb.set_trace()\n"
  },
  {
    "path": "DLTA_AI_app/trackers/strongsort/utils/tools.py",
    "content": "from functools import wraps\nfrom time import time\n\n\ndef is_video(ext: str):\n    \"\"\"\n    Returns true if ext exists in\n    allowed_exts for video files.\n\n    Args:\n        ext:\n\n    Returns:\n\n    \"\"\"\n\n    allowed_exts = ('.mp4', '.webm', '.ogg', '.avi', '.wmv', '.mkv', '.3gp')\n    return any((ext.endswith(x) for x in allowed_exts))\n\n\ndef tik_tok(func):\n    \"\"\"\n    keep track of time for each process.\n    Args:\n        func:\n\n    Returns:\n\n    \"\"\"\n    @wraps(func)\n    def _time_it(*args, **kwargs):\n        start = time()\n        try:\n            return func(*args, **kwargs)\n        finally:\n            end_ = time()\n            print(\"time: {:.03f}s, fps: {:.03f}\".format(end_ - start, 1 / (end_ - start)))\n\n    return _time_it\n"
  },
  {
    "path": "LICENSE",
    "content": "                    GNU GENERAL PUBLIC LICENSE\n                       Version 3, 29 June 2007\n\n Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>\n Everyone is permitted to copy and distribute verbatim copies\n of this license document, but changing it is not allowed.\n\n                            Preamble\n\n  The GNU General Public License is a free, copyleft license for\nsoftware and other kinds of works.\n\n  The licenses for most software and other practical works are designed\nto take away your freedom to share and change the works.  By contrast,\nthe GNU General Public License is intended to guarantee your freedom to\nshare and change all versions of a program--to make sure it remains free\nsoftware for all its users.  We, the Free Software Foundation, use the\nGNU General Public License for most of our software; it applies also to\nany other work released this way by its authors.  You can apply it to\nyour programs, too.\n\n  When we speak of free software, we are referring to freedom, not\nprice.  Our General Public Licenses are designed to make sure that you\nhave the freedom to distribute copies of free software (and charge for\nthem if you wish), that you receive source code or can get it if you\nwant it, that you can change the software or use pieces of it in new\nfree programs, and that you know you can do these things.\n\n  To protect your rights, we need to prevent others from denying you\nthese rights or asking you to surrender the rights.  Therefore, you have\ncertain responsibilities if you distribute copies of the software, or if\nyou modify it: responsibilities to respect the freedom of others.\n\n  For example, if you distribute copies of such a program, whether\ngratis or for a fee, you must pass on to the recipients the same\nfreedoms that you received.  You must make sure that they, too, receive\nor can get the source code.  And you must show them these terms so they\nknow their rights.\n\n  Developers that use the GNU GPL protect your rights with two steps:\n(1) assert copyright on the software, and (2) offer you this License\ngiving you legal permission to copy, distribute and/or modify it.\n\n  For the developers' and authors' protection, the GPL clearly explains\nthat there is no warranty for this free software.  For both users' and\nauthors' sake, the GPL requires that modified versions be marked as\nchanged, so that their problems will not be attributed erroneously to\nauthors of previous versions.\n\n  Some devices are designed to deny users access to install or run\nmodified versions of the software inside them, although the manufacturer\ncan do so.  This is fundamentally incompatible with the aim of\nprotecting users' freedom to change the software.  The systematic\npattern of such abuse occurs in the area of products for individuals to\nuse, which is precisely where it is most unacceptable.  Therefore, we\nhave designed this version of the GPL to prohibit the practice for those\nproducts.  If such problems arise substantially in other domains, we\nstand ready to extend this provision to those domains in future versions\nof the GPL, as needed to protect the freedom of users.\n\n  Finally, every program is threatened constantly by software patents.\nStates should not allow patents to restrict development and use of\nsoftware on general-purpose computers, but in those that do, we wish to\navoid the special danger that patents applied to a free program could\nmake it effectively proprietary.  To prevent this, the GPL assures that\npatents cannot be used to render the program non-free.\n\n  The precise terms and conditions for copying, distribution and\nmodification follow.\n\n                       TERMS AND CONDITIONS\n\n  0. Definitions.\n\n  \"This License\" refers to version 3 of the GNU General Public License.\n\n  \"Copyright\" also means copyright-like laws that apply to other kinds of\nworks, such as semiconductor masks.\n\n  \"The Program\" refers to any copyrightable work licensed under this\nLicense.  Each licensee is addressed as \"you\".  \"Licensees\" and\n\"recipients\" may be individuals or organizations.\n\n  To \"modify\" a work means to copy from or adapt all or part of the work\nin a fashion requiring copyright permission, other than the making of an\nexact copy.  The resulting work is called a \"modified version\" of the\nearlier work or a work \"based on\" the earlier work.\n\n  A \"covered work\" means either the unmodified Program or a work based\non the Program.\n\n  To \"propagate\" a work means to do anything with it that, without\npermission, would make you directly or secondarily liable for\ninfringement under applicable copyright law, except executing it on a\ncomputer or modifying a private copy.  Propagation includes copying,\ndistribution (with or without modification), making available to the\npublic, and in some countries other activities as well.\n\n  To \"convey\" a work means any kind of propagation that enables other\nparties to make or receive copies.  Mere interaction with a user through\na computer network, with no transfer of a copy, is not conveying.\n\n  An interactive user interface displays \"Appropriate Legal Notices\"\nto the extent that it includes a convenient and prominently visible\nfeature that (1) displays an appropriate copyright notice, and (2)\ntells the user that there is no warranty for the work (except to the\nextent that warranties are provided), that licensees may convey the\nwork under this License, and how to view a copy of this License.  If\nthe interface presents a list of user commands or options, such as a\nmenu, a prominent item in the list meets this criterion.\n\n  1. Source Code.\n\n  The \"source code\" for a work means the preferred form of the work\nfor making modifications to it.  \"Object code\" means any non-source\nform of a work.\n\n  A \"Standard Interface\" means an interface that either is an official\nstandard defined by a recognized standards body, or, in the case of\ninterfaces specified for a particular programming language, one that\nis widely used among developers working in that language.\n\n  The \"System Libraries\" of an executable work include anything, other\nthan the work as a whole, that (a) is included in the normal form of\npackaging a Major Component, but which is not part of that Major\nComponent, and (b) serves only to enable use of the work with that\nMajor Component, or to implement a Standard Interface for which an\nimplementation is available to the public in source code form.  A\n\"Major Component\", in this context, means a major essential component\n(kernel, window system, and so on) of the specific operating system\n(if any) on which the executable work runs, or a compiler used to\nproduce the work, or an object code interpreter used to run it.\n\n  The \"Corresponding Source\" for a work in object code form means all\nthe source code needed to generate, install, and (for an executable\nwork) run the object code and to modify the work, including scripts to\ncontrol those activities.  However, it does not include the work's\nSystem Libraries, or general-purpose tools or generally available free\nprograms which are used unmodified in performing those activities but\nwhich are not part of the work.  For example, Corresponding Source\nincludes interface definition files associated with source files for\nthe work, and the source code for shared libraries and dynamically\nlinked subprograms that the work is specifically designed to require,\nsuch as by intimate data communication or control flow between those\nsubprograms and other parts of the work.\n\n  The Corresponding Source need not include anything that users\ncan regenerate automatically from other parts of the Corresponding\nSource.\n\n  The Corresponding Source for a work in source code form is that\nsame work.\n\n  2. Basic Permissions.\n\n  All rights granted under this License are granted for the term of\ncopyright on the Program, and are irrevocable provided the stated\nconditions are met.  This License explicitly affirms your unlimited\npermission to run the unmodified Program.  The output from running a\ncovered work is covered by this License only if the output, given its\ncontent, constitutes a covered work.  This License acknowledges your\nrights of fair use or other equivalent, as provided by copyright law.\n\n  You may make, run and propagate covered works that you do not\nconvey, without conditions so long as your license otherwise remains\nin force.  You may convey covered works to others for the sole purpose\nof having them make modifications exclusively for you, or provide you\nwith facilities for running those works, provided that you comply with\nthe terms of this License in conveying all material for which you do\nnot control copyright.  Those thus making or running the covered works\nfor you must do so exclusively on your behalf, under your direction\nand control, on terms that prohibit them from making any copies of\nyour copyrighted material outside their relationship with you.\n\n  Conveying under any other circumstances is permitted solely under\nthe conditions stated below.  Sublicensing is not allowed; section 10\nmakes it unnecessary.\n\n  3. Protecting Users' Legal Rights From Anti-Circumvention Law.\n\n  No covered work shall be deemed part of an effective technological\nmeasure under any applicable law fulfilling obligations under article\n11 of the WIPO copyright treaty adopted on 20 December 1996, or\nsimilar laws prohibiting or restricting circumvention of such\nmeasures.\n\n  When you convey a covered work, you waive any legal power to forbid\ncircumvention of technological measures to the extent such circumvention\nis effected by exercising rights under this License with respect to\nthe covered work, and you disclaim any intention to limit operation or\nmodification of the work as a means of enforcing, against the work's\nusers, your or third parties' legal rights to forbid circumvention of\ntechnological measures.\n\n  4. Conveying Verbatim Copies.\n\n  You may convey verbatim copies of the Program's source code as you\nreceive it, in any medium, provided that you conspicuously and\nappropriately publish on each copy an appropriate copyright notice;\nkeep intact all notices stating that this License and any\nnon-permissive terms added in accord with section 7 apply to the code;\nkeep intact all notices of the absence of any warranty; and give all\nrecipients a copy of this License along with the Program.\n\n  You may charge any price or no price for each copy that you convey,\nand you may offer support or warranty protection for a fee.\n\n  5. Conveying Modified Source Versions.\n\n  You may convey a work based on the Program, or the modifications to\nproduce it from the Program, in the form of source code under the\nterms of section 4, provided that you also meet all of these conditions:\n\n    a) The work must carry prominent notices stating that you modified\n    it, and giving a relevant date.\n\n    b) The work must carry prominent notices stating that it is\n    released under this License and any conditions added under section\n    7.  This requirement modifies the requirement in section 4 to\n    \"keep intact all notices\".\n\n    c) You must license the entire work, as a whole, under this\n    License to anyone who comes into possession of a copy.  This\n    License will therefore apply, along with any applicable section 7\n    additional terms, to the whole of the work, and all its parts,\n    regardless of how they are packaged.  This License gives no\n    permission to license the work in any other way, but it does not\n    invalidate such permission if you have separately received it.\n\n    d) If the work has interactive user interfaces, each must display\n    Appropriate Legal Notices; however, if the Program has interactive\n    interfaces that do not display Appropriate Legal Notices, your\n    work need not make them do so.\n\n  A compilation of a covered work with other separate and independent\nworks, which are not by their nature extensions of the covered work,\nand which are not combined with it such as to form a larger program,\nin or on a volume of a storage or distribution medium, is called an\n\"aggregate\" if the compilation and its resulting copyright are not\nused to limit the access or legal rights of the compilation's users\nbeyond what the individual works permit.  Inclusion of a covered work\nin an aggregate does not cause this License to apply to the other\nparts of the aggregate.\n\n  6. Conveying Non-Source Forms.\n\n  You may convey a covered work in object code form under the terms\nof sections 4 and 5, provided that you also convey the\nmachine-readable Corresponding Source under the terms of this License,\nin one of these ways:\n\n    a) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by the\n    Corresponding Source fixed on a durable physical medium\n    customarily used for software interchange.\n\n    b) Convey the object code in, or embodied in, a physical product\n    (including a physical distribution medium), accompanied by a\n    written offer, valid for at least three years and valid for as\n    long as you offer spare parts or customer support for that product\n    model, to give anyone who possesses the object code either (1) a\n    copy of the Corresponding Source for all the software in the\n    product that is covered by this License, on a durable physical\n    medium customarily used for software interchange, for a price no\n    more than your reasonable cost of physically performing this\n    conveying of source, or (2) access to copy the\n    Corresponding Source from a network server at no charge.\n\n    c) Convey individual copies of the object code with a copy of the\n    written offer to provide the Corresponding Source.  This\n    alternative is allowed only occasionally and noncommercially, and\n    only if you received the object code with such an offer, in accord\n    with subsection 6b.\n\n    d) Convey the object code by offering access from a designated\n    place (gratis or for a charge), and offer equivalent access to the\n    Corresponding Source in the same way through the same place at no\n    further charge.  You need not require recipients to copy the\n    Corresponding Source along with the object code.  If the place to\n    copy the object code is a network server, the Corresponding Source\n    may be on a different server (operated by you or a third party)\n    that supports equivalent copying facilities, provided you maintain\n    clear directions next to the object code saying where to find the\n    Corresponding Source.  Regardless of what server hosts the\n    Corresponding Source, you remain obligated to ensure that it is\n    available for as long as needed to satisfy these requirements.\n\n    e) Convey the object code using peer-to-peer transmission, provided\n    you inform other peers where the object code and Corresponding\n    Source of the work are being offered to the general public at no\n    charge under subsection 6d.\n\n  A separable portion of the object code, whose source code is excluded\nfrom the Corresponding Source as a System Library, need not be\nincluded in conveying the object code work.\n\n  A \"User Product\" is either (1) a \"consumer product\", which means any\ntangible personal property which is normally used for personal, family,\nor household purposes, or (2) anything designed or sold for incorporation\ninto a dwelling.  In determining whether a product is a consumer product,\ndoubtful cases shall be resolved in favor of coverage.  For a particular\nproduct received by a particular user, \"normally used\" refers to a\ntypical or common use of that class of product, regardless of the status\nof the particular user or of the way in which the particular user\nactually uses, or expects or is expected to use, the product.  A product\nis a consumer product regardless of whether the product has substantial\ncommercial, industrial or non-consumer uses, unless such uses represent\nthe only significant mode of use of the product.\n\n  \"Installation Information\" for a User Product means any methods,\nprocedures, authorization keys, or other information required to install\nand execute modified versions of a covered work in that User Product from\na modified version of its Corresponding Source.  The information must\nsuffice to ensure that the continued functioning of the modified object\ncode is in no case prevented or interfered with solely because\nmodification has been made.\n\n  If you convey an object code work under this section in, or with, or\nspecifically for use in, a User Product, and the conveying occurs as\npart of a transaction in which the right of possession and use of the\nUser Product is transferred to the recipient in perpetuity or for a\nfixed term (regardless of how the transaction is characterized), the\nCorresponding Source conveyed under this section must be accompanied\nby the Installation Information.  But this requirement does not apply\nif neither you nor any third party retains the ability to install\nmodified object code on the User Product (for example, the work has\nbeen installed in ROM).\n\n  The requirement to provide Installation Information does not include a\nrequirement to continue to provide support service, warranty, or updates\nfor a work that has been modified or installed by the recipient, or for\nthe User Product in which it has been modified or installed.  Access to a\nnetwork may be denied when the modification itself materially and\nadversely affects the operation of the network or violates the rules and\nprotocols for communication across the network.\n\n  Corresponding Source conveyed, and Installation Information provided,\nin accord with this section must be in a format that is publicly\ndocumented (and with an implementation available to the public in\nsource code form), and must require no special password or key for\nunpacking, reading or copying.\n\n  7. Additional Terms.\n\n  \"Additional permissions\" are terms that supplement the terms of this\nLicense by making exceptions from one or more of its conditions.\nAdditional permissions that are applicable to the entire Program shall\nbe treated as though they were included in this License, to the extent\nthat they are valid under applicable law.  If additional permissions\napply only to part of the Program, that part may be used separately\nunder those permissions, but the entire Program remains governed by\nthis License without regard to the additional permissions.\n\n  When you convey a copy of a covered work, you may at your option\nremove any additional permissions from that copy, or from any part of\nit.  (Additional permissions may be written to require their own\nremoval in certain cases when you modify the work.)  You may place\nadditional permissions on material, added by you to a covered work,\nfor which you have or can give appropriate copyright permission.\n\n  Notwithstanding any other provision of this License, for material you\nadd to a covered work, you may (if authorized by the copyright holders of\nthat material) supplement the terms of this License with terms:\n\n    a) Disclaiming warranty or limiting liability differently from the\n    terms of sections 15 and 16 of this License; or\n\n    b) Requiring preservation of specified reasonable legal notices or\n    author attributions in that material or in the Appropriate Legal\n    Notices displayed by works containing it; or\n\n    c) Prohibiting misrepresentation of the origin of that material, or\n    requiring that modified versions of such material be marked in\n    reasonable ways as different from the original version; or\n\n    d) Limiting the use for publicity purposes of names of licensors or\n    authors of the material; or\n\n    e) Declining to grant rights under trademark law for use of some\n    trade names, trademarks, or service marks; or\n\n    f) Requiring indemnification of licensors and authors of that\n    material by anyone who conveys the material (or modified versions of\n    it) with contractual assumptions of liability to the recipient, for\n    any liability that these contractual assumptions directly impose on\n    those licensors and authors.\n\n  All other non-permissive additional terms are considered \"further\nrestrictions\" within the meaning of section 10.  If the Program as you\nreceived it, or any part of it, contains a notice stating that it is\ngoverned by this License along with a term that is a further\nrestriction, you may remove that term.  If a license document contains\na further restriction but permits relicensing or conveying under this\nLicense, you may add to a covered work material governed by the terms\nof that license document, provided that the further restriction does\nnot survive such relicensing or conveying.\n\n  If you add terms to a covered work in accord with this section, you\nmust place, in the relevant source files, a statement of the\nadditional terms that apply to those files, or a notice indicating\nwhere to find the applicable terms.\n\n  Additional terms, permissive or non-permissive, may be stated in the\nform of a separately written license, or stated as exceptions;\nthe above requirements apply either way.\n\n  8. Termination.\n\n  You may not propagate or modify a covered work except as expressly\nprovided under this License.  Any attempt otherwise to propagate or\nmodify it is void, and will automatically terminate your rights under\nthis License (including any patent licenses granted under the third\nparagraph of section 11).\n\n  However, if you cease all violation of this License, then your\nlicense from a particular copyright holder is reinstated (a)\nprovisionally, unless and until the copyright holder explicitly and\nfinally terminates your license, and (b) permanently, if the copyright\nholder fails to notify you of the violation by some reasonable means\nprior to 60 days after the cessation.\n\n  Moreover, your license from a particular copyright holder is\nreinstated permanently if the copyright holder notifies you of the\nviolation by some reasonable means, this is the first time you have\nreceived notice of violation of this License (for any work) from that\ncopyright holder, and you cure the violation prior to 30 days after\nyour receipt of the notice.\n\n  Termination of your rights under this section does not terminate the\nlicenses of parties who have received copies or rights from you under\nthis License.  If your rights have been terminated and not permanently\nreinstated, you do not qualify to receive new licenses for the same\nmaterial under section 10.\n\n  9. Acceptance Not Required for Having Copies.\n\n  You are not required to accept this License in order to receive or\nrun a copy of the Program.  Ancillary propagation of a covered work\noccurring solely as a consequence of using peer-to-peer transmission\nto receive a copy likewise does not require acceptance.  However,\nnothing other than this License grants you permission to propagate or\nmodify any covered work.  These actions infringe copyright if you do\nnot accept this License.  Therefore, by modifying or propagating a\ncovered work, you indicate your acceptance of this License to do so.\n\n  10. Automatic Licensing of Downstream Recipients.\n\n  Each time you convey a covered work, the recipient automatically\nreceives a license from the original licensors, to run, modify and\npropagate that work, subject to this License.  You are not responsible\nfor enforcing compliance by third parties with this License.\n\n  An \"entity transaction\" is a transaction transferring control of an\norganization, or substantially all assets of one, or subdividing an\norganization, or merging organizations.  If propagation of a covered\nwork results from an entity transaction, each party to that\ntransaction who receives a copy of the work also receives whatever\nlicenses to the work the party's predecessor in interest had or could\ngive under the previous paragraph, plus a right to possession of the\nCorresponding Source of the work from the predecessor in interest, if\nthe predecessor has it or can get it with reasonable efforts.\n\n  You may not impose any further restrictions on the exercise of the\nrights granted or affirmed under this License.  For example, you may\nnot impose a license fee, royalty, or other charge for exercise of\nrights granted under this License, and you may not initiate litigation\n(including a cross-claim or counterclaim in a lawsuit) alleging that\nany patent claim is infringed by making, using, selling, offering for\nsale, or importing the Program or any portion of it.\n\n  11. Patents.\n\n  A \"contributor\" is a copyright holder who authorizes use under this\nLicense of the Program or a work on which the Program is based.  The\nwork thus licensed is called the contributor's \"contributor version\".\n\n  A contributor's \"essential patent claims\" are all patent claims\nowned or controlled by the contributor, whether already acquired or\nhereafter acquired, that would be infringed by some manner, permitted\nby this License, of making, using, or selling its contributor version,\nbut do not include claims that would be infringed only as a\nconsequence of further modification of the contributor version.  For\npurposes of this definition, \"control\" includes the right to grant\npatent sublicenses in a manner consistent with the requirements of\nthis License.\n\n  Each contributor grants you a non-exclusive, worldwide, royalty-free\npatent license under the contributor's essential patent claims, to\nmake, use, sell, offer for sale, import and otherwise run, modify and\npropagate the contents of its contributor version.\n\n  In the following three paragraphs, a \"patent license\" is any express\nagreement or commitment, however denominated, not to enforce a patent\n(such as an express permission to practice a patent or covenant not to\nsue for patent infringement).  To \"grant\" such a patent license to a\nparty means to make such an agreement or commitment not to enforce a\npatent against the party.\n\n  If you convey a covered work, knowingly relying on a patent license,\nand the Corresponding Source of the work is not available for anyone\nto copy, free of charge and under the terms of this License, through a\npublicly available network server or other readily accessible means,\nthen you must either (1) cause the Corresponding Source to be so\navailable, or (2) arrange to deprive yourself of the benefit of the\npatent license for this particular work, or (3) arrange, in a manner\nconsistent with the requirements of this License, to extend the patent\nlicense to downstream recipients.  \"Knowingly relying\" means you have\nactual knowledge that, but for the patent license, your conveying the\ncovered work in a country, or your recipient's use of the covered work\nin a country, would infringe one or more identifiable patents in that\ncountry that you have reason to believe are valid.\n\n  If, pursuant to or in connection with a single transaction or\narrangement, you convey, or propagate by procuring conveyance of, a\ncovered work, and grant a patent license to some of the parties\nreceiving the covered work authorizing them to use, propagate, modify\nor convey a specific copy of the covered work, then the patent license\nyou grant is automatically extended to all recipients of the covered\nwork and works based on it.\n\n  A patent license is \"discriminatory\" if it does not include within\nthe scope of its coverage, prohibits the exercise of, or is\nconditioned on the non-exercise of one or more of the rights that are\nspecifically granted under this License.  You may not convey a covered\nwork if you are a party to an arrangement with a third party that is\nin the business of distributing software, under which you make payment\nto the third party based on the extent of your activity of conveying\nthe work, and under which the third party grants, to any of the\nparties who would receive the covered work from you, a discriminatory\npatent license (a) in connection with copies of the covered work\nconveyed by you (or copies made from those copies), or (b) primarily\nfor and in connection with specific products or compilations that\ncontain the covered work, unless you entered into that arrangement,\nor that patent license was granted, prior to 28 March 2007.\n\n  Nothing in this License shall be construed as excluding or limiting\nany implied license or other defenses to infringement that may\notherwise be available to you under applicable patent law.\n\n  12. No Surrender of Others' Freedom.\n\n  If conditions are imposed on you (whether by court order, agreement or\notherwise) that contradict the conditions of this License, they do not\nexcuse you from the conditions of this License.  If you cannot convey a\ncovered work so as to satisfy simultaneously your obligations under this\nLicense and any other pertinent obligations, then as a consequence you may\nnot convey it at all.  For example, if you agree to terms that obligate you\nto collect a royalty for further conveying from those to whom you convey\nthe Program, the only way you could satisfy both those terms and this\nLicense would be to refrain entirely from conveying the Program.\n\n  13. Use with the GNU Affero General Public License.\n\n  Notwithstanding any other provision of this License, you have\npermission to link or combine any covered work with a work licensed\nunder version 3 of the GNU Affero General Public License into a single\ncombined work, and to convey the resulting work.  The terms of this\nLicense will continue to apply to the part which is the covered work,\nbut the special requirements of the GNU Affero General Public License,\nsection 13, concerning interaction through a network will apply to the\ncombination as such.\n\n  14. Revised Versions of this License.\n\n  The Free Software Foundation may publish revised and/or new versions of\nthe GNU General Public License from time to time.  Such new versions will\nbe similar in spirit to the present version, but may differ in detail to\naddress new problems or concerns.\n\n  Each version is given a distinguishing version number.  If the\nProgram specifies that a certain numbered version of the GNU General\nPublic License \"or any later version\" applies to it, you have the\noption of following the terms and conditions either of that numbered\nversion or of any later version published by the Free Software\nFoundation.  If the Program does not specify a version number of the\nGNU General Public License, you may choose any version ever published\nby the Free Software Foundation.\n\n  If the Program specifies that a proxy can decide which future\nversions of the GNU General Public License can be used, that proxy's\npublic statement of acceptance of a version permanently authorizes you\nto choose that version for the Program.\n\n  Later license versions may give you additional or different\npermissions.  However, no additional obligations are imposed on any\nauthor or copyright holder as a result of your choosing to follow a\nlater version.\n\n  15. Disclaimer of Warranty.\n\n  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY\nAPPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT\nHOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM \"AS IS\" WITHOUT WARRANTY\nOF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,\nTHE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\nPURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM\nIS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF\nALL NECESSARY SERVICING, REPAIR OR CORRECTION.\n\n  16. Limitation of Liability.\n\n  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING\nWILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS\nTHE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY\nGENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE\nUSE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF\nDATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD\nPARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),\nEVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF\nSUCH DAMAGES.\n\n  17. Interpretation of Sections 15 and 16.\n\n  If the disclaimer of warranty and limitation of liability provided\nabove cannot be given local legal effect according to their terms,\nreviewing courts shall apply local law that most closely approximates\nan absolute waiver of all civil liability in connection with the\nProgram, unless a warranty or assumption of liability accompanies a\ncopy of the Program in return for a fee.\n\n                     END OF TERMS AND CONDITIONS\n\n            How to Apply These Terms to Your New Programs\n\n  If you develop a new program, and you want it to be of the greatest\npossible use to the public, the best way to achieve this is to make it\nfree software which everyone can redistribute and change under these terms.\n\n  To do so, attach the following notices to the program.  It is safest\nto attach them to the start of each source file to most effectively\nstate the exclusion of warranty; and each file should have at least\nthe \"copyright\" line and a pointer to where the full notice is found.\n\n    <one line to give the program's name and a brief idea of what it does.>\n    Copyright (C) <year>  <name of author>\n\n    This program is free software: you can redistribute it and/or modify\n    it under the terms of the GNU General Public License as published by\n    the Free Software Foundation, either version 3 of the License, or\n    (at your option) any later version.\n\n    This program is distributed in the hope that it will be useful,\n    but WITHOUT ANY WARRANTY; without even the implied warranty of\n    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n    GNU General Public License for more details.\n\n    You should have received a copy of the GNU General Public License\n    along with this program.  If not, see <https://www.gnu.org/licenses/>.\n\nAlso add information on how to contact you by electronic and paper mail.\n\n  If the program does terminal interaction, make it output a short\nnotice like this when it starts in an interactive mode:\n\n    <program>  Copyright (C) <year>  <name of author>\n    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.\n    This is free software, and you are welcome to redistribute it\n    under certain conditions; type `show c' for details.\n\nThe hypothetical commands `show w' and `show c' should show the appropriate\nparts of the General Public License.  Of course, your program's commands\nmight be different; for a GUI interface, you would use an \"about box\".\n\n  You should also get your employer (if you work as a programmer) or school,\nif any, to sign a \"copyright disclaimer\" for the program, if necessary.\nFor more information on this, and how to apply and follow the GNU GPL, see\n<https://www.gnu.org/licenses/>.\n\n  The GNU General Public License does not permit incorporating your program\ninto proprietary programs.  If your program is a subroutine library, you\nmay consider it more useful to permit linking proprietary applications with\nthe library.  If this is what you want to do, use the GNU Lesser General\nPublic License instead of this License.  But first, please read\n<https://www.gnu.org/licenses/why-not-lgpl.html>."
  },
  {
    "path": "MANIFEST.in",
    "content": "recursive-include . *"
  },
  {
    "path": "README.md",
    "content": "<div align = \"center\">\n<h1>\n    <img src = \"https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/icon.png?raw=true\" width = 200 height = 200>\n<br>\n\n</h1>\n\n<h3>\nData Labeling, Tracking and Annotation with AI\n</h3>\n\nDLTA-AI is the next generation of annotation tools, integrating the power of Computer Vision SOTA models to <a href = \"https://github.com/wkentaro/labelme\">Labelme</a> in a seamless expirence and intuitive workflow to make creating image datasets easier than ever before\n\n\n[![User Guide](https://img.shields.io/badge/User%20Guide-blue)](https://0ssamaak0.github.io/DLTA-AI/)\n[![Youtube Channel](https://img.shields.io/youtube/channel/views/UCJi8OFF-fzwGSAC8BWJ0BhQ)](https://www.youtube.com/@DLTA-AI)\n[![Discord Server](https://img.shields.io/discord/1130938906616004638)](https://discord.gg/9Rmwcnc4)\n[![PyPI - Downloads](https://img.shields.io/pypi/dm/DLTA-AI)](https://pypi.org/project/DLTA-AI/)\n[![GitHub release (latest by date including pre-releases)](https://img.shields.io/github/v/release/0ssamaak0/DLTA-AI?include_prereleases)](https://github.com/0ssamaak0/DLTA-AI/releases)\n[![GitHub issues](https://img.shields.io/github/issues/0ssamaak0/DLTA-AI)](https://github.com/0ssamaak0/DLTA-AI/issues)\n[![GitHub last commit](https://img.shields.io/github/last-commit/0ssamaak0/DLTA-AI)](https://github.com/0ssamaak0/DLTA-AI/commits)\n[![GitHub License](https://img.shields.io/github/license/0ssamaak0/DLTA-AI)](https://github.com/0ssamaak0/DLTA-AI/blob/master/LICENSE)\n\n![gif_main](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/gif_main.gif?raw=true)\n\n<!-- make p with larger font size -->\n[Installation](#installation-%EF%B8%8F)  🛠️ | [Segment Anything](#Segment-Anything-) 🪄 | [Model Selection](#model-selection-) 🤖 | [Segmentation](#segmentation-) 🎨 | [Object Tracking](#object-tracking-) 🚗 | [Export](#export-) 📤 | [Other Features](#other-features-) 🌟| [Contributing](#contributing-) 🤝| [Acknowledgements](#acknowledgements-)🙏| [Resources](#resources-) 🌐 | [License](#license-) 📜\n\n</div>\n\n\n\n# Installation 🛠️\nAfter creating a new environment, installing Pytorch to it, you can install DLTA-AI using pip\n```\npip install DLTA-AI\n```\nand run it using\n```\nDLTA-AI\n```\nCheck the [Installation section in User Guide](https://0ssamaak0.github.io/DLTA-AI/installation/full-installation/) for more details, different installation options and solutions for common issues.\n# Segment Anything 🪄\nDLTA-AI takes the Annotation to the next level by integrating lastest Meta models [Segment Anything (SAM)](https://github.com/facebookresearch/segment-anything) to support zero-shot segmentation for any class\n\n**SAM** can be used also to improve the quality of Segmentation, even inaccurate polygons around the object is enough to be segmented correctly\n\n**SAM** doesn't only work for Segmentation tasks, it's build in the video mode to support **Object Tracking** as well for any class\n\n<div align = \"center\">\n\n![Segment Anything](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/SAM.gif?raw=true)\n</div>\n\n# Model Selection 🤖\nFor model selection, DLTA-AI provides the **Model Explorer** to utilize the power of the numerous models in [mmdetection](https://github.com/open-mmlab/mmdetection/tree/2.x) and [ultralytics YOLOv8](https://github.com/ultralytics/ultralytics) as well as the models of [SAM](https://github.com/facebookresearch/segment-anything)\n\nthe to give the user the ability to compare, download and select from the library of models\n<div align = \"center\">\n\n![Model Explorer](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/model_explorer.png?raw=true)\n</div>\n\n# Segmentation 🎨\nUsing the models from the **Model Explorer**, DLTA-AI provides a seamless expirence to annotate single image or batch of images, with options to select classes, modify threshold, and full control to edit the segmentation results.\n\n<div align = \"center\">\n\n![Segmentation](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/segmentation.png?raw=true)\n</div>\nand as mentioned before, **SAM** is fully integrated in DLTA-AI to provide zero-shot segmentation for any class, and to improve the quality of segmentation\n\n# Object Tracking 🚗\nBuilt on top of the segmentation and detection models, DLTA-AI provides a complete solution for Object Tracking, with 5 different models for tracking\n\nTo import DLTA-AI have options for video navigation, tracking settings and different visualization options with the ability to export the tracking results to a video file\n\nBeside this, DLTA-AI provides a completely new way to modify the tracking results, including edit and delete propagation across frames\n\n<div align = \"center\">\n\n![Object Tracking](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/tracking.gif?raw=true)\n\n</div>\n\nBeside automatic tracking models, DLTA-AI provides different methods of interpolation and filling gaps between frames to fix occlusions and unpredicted behaviors in a semi-automatic way\n\n<div align = \"center\">\n\n![Interpolation](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/interpolation.png?raw=true)\n</div>\n\n# Export 📤\nFor Instance Segmentation, DLTA-AI provides to option to export the segmentation to standard COCO format, and the results of tracking to MOT format, and a video file for the tracking results with desired visualization options e.g., show id, bbox, class name, etc.\n\n<div align = \"center\">\n\n![Export](https://github.com/0ssamaak0/DLTA-AI/blob/master/assets/Export.png?raw=true)\n\n</div>\n\nDLTA-AI provides also the ability to add user-defined or custom export formats that can be used for any purpose, once the user defines his own format, it will be available in the export menu.\n\n# Other Features 🌟\n\n- Threshold Selection (Confidence and IoU)\n- Select Classes (from 80 COCO classes) with option to save default classes\n- Track assigned objects only\n- Merging models (Run both models and merge the results)\n- Show Runtime Type (CPU/GPU)\n- Show GPU Memory Usage\n- Video Navigation (Frame by Frame, Fast Forward, Fast Backward, Play/Pause)\n- Light / Dark Theme Support (syncs with OS theme)\n- Fully Customizable UI (drag and drop, show/hide)\n- OS Notifications (for long running tasks)\n- using orjson for faster json serialization\n- additional script (external) to evaluate the results of segmentation (COCO)\n- additional script (external) to extract frames from a video file for future use\n- User shortcuts and preferences settings\n\n# Contributing 🤝\nDLTA-AI is an open source project and contributions are very welcome, specially in this early stage of development.\n\nYou can contribute in many ways:\n- Create an [issue](https://github.com/0ssamaak0/DLTA-AI/issues) Reporting bugs 🐞 or suggesting new features 🌟 or just give your feedback 📝\n\n- Create a [pull request](https://github.com/0ssamaak0/DLTA-AI/pulls) to fix bugs or add new features, or just to improve the code quality, optimize performance, documentation, or even just to fix typos\n\n- Review [pull requests](https://github.com/0ssamaak0/DLTA-AI/pulls) and help with the code review process\n\n- Spread the word about DLTA-AI and help us grow the community 🌎, by sharing the project on social media, or just by telling your friends about it\n\n# Acknowledgements 🙏\nThis tool is part of a Graduation Project at [Faculty of Engineering, Ain Shams University](https://eng.asu.edu.eg/) under the supervision of:\n\n- [Dr. Karim Ismail](https://carleton.ca/cee/profile/karim-ismail/)\n- [Dr. Ahmed Osama](ahmed.osama@eng.asu.edu.eg)\n- Dr. Watheq El-Kharashy\n- [Eng. Yousra El-Qattan](https://www.linkedin.com/in/youssra-elqattan/)\n\nwe want also to thank our friends who helped us with testing, feedback and suggestions:\n\n- [Eng. Houssam Siyoufi](https://www.linkedin.com/in/houssam-siyoufi-163627110/)\n- [Amin Mohamed](https://github.com/AminMohamed-3)\n- [Badr Mohamed](https://github.com/Badr-1)\n- [Ahmed Mahmoud](https://github.com/ahmedd-mahmoudd)\n- [Youssef Ashraf](https://github.com/0xNine9)\n- [Chadi Ashraf](https://github.com/Chady00)\n\n\n# Resources 🌐\n- [Labelme](https://github.com/wkentaro/labelme)\n- [Segment Anything (SAM)](https://github.com/facebookresearch/segment-anything)\n- [MMDetection](https://github.com/open-mmlab/mmdetection/tree/2.x)\n- [ultralytics YOLOv8](https://github.com/ultralytics/ultralytics)\n- [mikelbrostrom yolov8_tracking](https://github.com/mikel-brostrom/yolov8_tracking)\n- [orjson](https://github.com/ijl/orjson)\n- [icons8](https://icons8.com/)\n\n# License 📜\nDLTA-AI is released under the [GPLv3 license](https://github.com/0ssamaak0/DLTA-AI/blob/master/LICENSE). \n"
  },
  {
    "path": "additional_scripts/coco_eval.py",
    "content": "import sys\nimport os\nimport json\nimport argparse\nfrom pycocotools.coco import COCO\nfrom pycocotools.cocoeval import COCOeval\n\n# Define a class to suppress print statements\n\n\nclass HiddenPrints:\n    \"\"\"\n    A context manager to suppress print statements.\n    \"\"\"\n\n    def __enter__(self):\n        self._original_stdout = sys.stdout\n        sys.stdout = open(os.devnull, 'w')\n\n    def __exit__(self, exc_type, exc_val, exc_tb):\n        sys.stdout.close()\n        sys.stdout = self._original_stdout\n\n\n# Define the function to evaluate coco\ndef evaluate_coco(gt_file: str, pred_file: str, task: str = \"bbox\", evaluation_type: str = \"full\") -> None:\n    \"\"\"\n    Evaluates the performance of a COCO object detection model.\n\n    Args:\n        gt_file (str): Path to the ground truth file.\n        pred_file (str): Path to the prediction file.\n        task (str, optional): The type of task to evaluate (bbox or segm). Defaults to \"bbox\".\n        evaluation_type (str, optional): The type of evaluation to perform (full or mAP). Defaults to \"full\".\n    \"\"\"\n    # Use HiddenPrints to suppress print statements\n    with HiddenPrints():\n        # Load the ground truth file\n        coco_gt = COCO(gt_file)\n\n        # Load the prediction file\n        with open(pred_file, 'r') as f:\n            pred_file = json.load(f)\n            pred_file = pred_file[0]['annotations']  # type: ignore\n\n        coco_dt = coco_gt.loadRes(pred_file)\n\n        # Create a COCO evaluator object\n        coco_eval = COCOeval(coco_gt, coco_dt, task)  # type: ignore\n\n        # Evaluate the model\n        coco_eval.evaluate()\n        coco_eval.accumulate()\n\n        # Compute stats\n        coco_eval.summarize()\n\n    # Print the results based on the evaluation type\n    if evaluation_type == \"full\":\n        coco_eval.summarize()\n    elif evaluation_type == \"mAP\":\n        print(f\"{task} mAP: {coco_eval.stats[0]:.3f}\")\n\n\n# Create an argument parser\nparser = argparse.ArgumentParser()\nparser.add_argument(\"--gt_file\", required=True, help=\"ground truth file\")\nparser.add_argument(\"--pred_file\", required=True, help=\"prediction file\")\nparser.add_argument(\"--task\", default=\"bbox\",\n                    choices=[\"bbox\", \"segm\"], help=\"task (bbox or segm)\")\nparser.add_argument(\"--evaluation_type\", default=\"full\",\n                    choices=[\"full\", \"mAP\"], help=\"evaluation type (full or mAP)\")\n\n# Parse the arguments\nargs = parser.parse_args()\n\n# Run the function with the arguments\nevaluate_coco(args.gt_file, args.pred_file, args.task, args.evaluation_type)\n"
  },
  {
    "path": "docs/Installation/executable.md",
    "content": "---\nlabel: Executable (CPU Only) \norder: 2\nicon: \":package:\"\n---\nDLTA-AI is available as an executable, however it's CPU only, so it's not recommended for large datasets. It's currently available for windows and linux only\n\nyou can download the [lastest release](https://github.com/0ssamaak0/DLTA-AI/releases) Executable under **Assets** \n\nThe Executable doesn't require any installation, just download and run it from the executable file \n\n![Executable image in file explorer](https://github.com/0ssamaak0/DLTA-AI/raw/master/docs/assets/exe.png?raw=true)"
  },
  {
    "path": "docs/Installation/full installation.md",
    "content": "---\nlabel: full installation\nicon: \":inbox_tray:\"\norder: 3\n---\n\n# Full Installation\n## Create a Virtual Environment\nIt is highly recommended to install DTLA-AI in virtual environment using conda. This will ensure a clean and isolated environment for the installation process. use `python=3.8` to avoid any compatibility issues\n```\nconda create -n DLTA-AI python=3.8\nconda activate DLTA-AI\n```\n\n\n## Install Pytorch\n\nFirst, you need to install [pytorch](https://pytorch.org/get-started/locally/) according to your device and your OS, if you have GPU, choose CUDA version, otherwise choose CPU version\n\nExample:\n```\nconda install pytorch torchvision torchaudio .... -c pytorch>\n```\n\n!!!\nDue to inconsistency between the current versions of `pytorch` and `mmcv`, some users may face issues when using `pytorch 2.x`\nif you face any issues, try to install `pytorch 1.13.1` instead\n!!!\n\n## Option 1: Using pip\nInstallation using pip is more easier since it handles all dependencies\n```\npip install DLTA-AI\n```\nthen run it from anywhere using\n```\nDLTA-AI\n```\nnote that first time running DLTA-AI, it will download a required module, it may take some time\n\nyou can also use pip for updating DLTA-AI\n```\npip install DLTA-AI -U\n```\n\n\n\n## Option 2: Manual Installation\nDownload the lastest release from [here](https://github.com/0ssamaak0/DLTA-AI/releases)\n\ninstall requirements\n\n```\npip install -r requirements.txt\nmim install mmcv-full==1.7.0\n```\nthen \nRun the tool from `DLTA_AI_app` directory\n```\ncd DLTA_AI_app\npython __main__.py\n```\n\n\n\n\n\n\n"
  },
  {
    "path": "docs/Installation/index.yml",
    "content": "label: Installation\nicon: \":hammer_and_wrench:\"\nexpanded: true\norder: 50"
  },
  {
    "path": "docs/Installation/problems.md",
    "content": "---\nlabel: possible problems\norder: 1\nicon: \":interrobang:\"\n---\n\n# Solutions to possible problems\n\n## Qt Platform Plugin Error in OpenCV on Linux Machines 🐧\nsome linux machines may have this problem \n```\nCould not load the Qt platform plugin \"xcb\" in \"/home/<username>/miniconda3/envs/test/lib/python3.8/site-packages/cv2/qt/plugins\" even though it was found.\nThis application failed to start because no Qt platform plugin could be initialized. Reinstalling the application may fix this problem.\n\nAvailable platform plugins are: xcb, eglfs, linuxfb, minimal, minimalegl, offscreen, vnc, wayland-egl, wayland, wayland-xcomposite-egl, wayland-xcomposite-glx, webgl.\n```\nit can be solved simply be installing opencv-headless\n```\npip3 install opencv-python-headless\n```\n## Microsoft Visual C++ Build Tools Error When Installing MMDetection on Windows Machines 🪟\nsome windows machines may have this problem when installing **mmdet**\n```\nBuilding wheel for pycocotools (setup.py) ... error\n...\nerror: Microsoft Visual C++ 14.0 or greater is required. Get it with \"Microsoft C++ Build Tools\": https://visualstudio.microsoft.com/visual-cpp-build-tools/\n```\nYou can try\n```\nconda install -c conda-forge pycocotools\n```\nor just use Visual Studio installer to Install `MSVC v143 - VS 2022 C++ x64/x86 build tools (Latest)**`\n## Problem in installing mmcv-full\nyou may often stuck in installing `mmcv-full` with this message\n```\nBuilding wheels for collected packages: mmcv-full\n  Building wheel for mmcv-full (setup.py) ...\n```\nyou can try installing [pytorch 1.13.1](https://pytorch.org/get-started/previous-versions/#v1131), instead of the lastest version, you can also refer to [this issue](https://github.com/open-mmlab/mmcv/issues/1386)\n\n## Multiple copies of the OpenMP runtime have been linked into the program\nyou may encounter this problem\n```\nOMP: Error #15: Initializing libiomp5md.dll, but found libiomp5md.dll already initialized.\n\nOMP: Hint This means that multiple copies of the OpenMP runtime have been linked into the program.\nThat is dangerous, since it can degrade performance or cause incorrect results. The best thing to do is to\nensure that only a single OpenMP runtime is linked into the process, e.g. by avoiding static linking of the\nOpenMP runtime in any library. As an unsafe, unsupported, undocumented workaround you can set the environment\nvariable KMP_DUPLICATE_LIB_OK=TRUE to allow the program to continue to execute, but that may cause crashes or silently\nproduce incorrect results. For more information, please see http://www.intel.com/software/products/support/.\n\n```\nyou can solve this by upgrading numpy\n```\npip install numpy==1.23.3\n```\n\nThanks for [mohamedraafat96's issue](https://github.com/0ssamaak0/DLTA-AI/issues/52), you can check this [stackoverflow answer](https://stackoverflow.com/questions/64209238/error-15-initializing-libiomp5md-dll-but-found-libiomp5md-dll-already-initial) for more details"
  },
  {
    "path": "docs/index.md",
    "content": "---\nlabel: \"DLTA-AI User Guide\"\nicon: \"assets/icon.png\"\n---\n\n# DLTA-AI User Guide\n\n![DLTA-AI Preview](https://github.com/0ssamaak0/DLTA-AI/raw/master/assets/gif_main.gif?raw=true)\n\n<h3 align= \"center\">\nData Labeling, Tracking and Annotation with AI.\n</h3>\n\nDLTA-AI is the next generation of annotation tools, integrating the power of Computer Vision SOTA models to Labelme in a seamless expirence and intuitive workflow to make creating image datasets easier than ever before\n\n\n## Why DLTA-AI?\nOpen source and customizable annotation tool was created to fill a gap in annotation tools.\nThe customization and giving the user the full control was and will be our priority, from the model selection, input formats and inference parameters, to the export formats and even the User Interface itself. From these options, the goal was to extend the use cases of the concept of annotation tool to other use cases for end users beyond just preparing datasets to train models.\n\n## Features\n-\tEasy and straightforward Installation process, support for all Operating Systems\n-\tUser Guide with detailed tutorials for all the features\n-\tFull Support of Auto Annotation with different models.\n-\tDifferent annotation options and parameters (e.g., Thresholds)\n-\tExport to (literally) any format\n-\tModern and functional User Interface\n-\tDedicated Video Mode\n-   Object Tracking Support \n-   Completely free and open-source, and will always be.\n\n\n\n## Contributing\nDLTA-AI is an open source project and contributions are very welcome\n\nYou can contribute in many ways:\n\n- Create an [issue](https://github.com/0ssamaak0/DLTA-AI/issues) Reporting bugs 🐞 or suggesting new features 🌟 or just give your feedback 📝\n\n- Create a [pull request](https://github.com/0ssamaak0/DLTA-AI/pulls) to fix bugs or add new features, or just to improve the code quality, optimize performance, documentation, or even just to fix typos\n\n- Review [pull requests](https://github.com/0ssamaak0/DLTA-AI/pulls) and help with the code review process\n\n- Spread the word about DLTA-AI and help us grow the community 🌎, by sharing the project on social media, or just by telling your friends about it\n\n## Resources\n- [Labelme](https://github.com/wkentaro/labelme)\n- [Segment Anything (SAM)](https://github.com/facebookresearch/segment-anything)\n- [MMDetection](https://github.com/open-mmlab/mmdetection/tree/2.x)\n- [ultralytics YOLOv8](https://github.com/ultralytics/ultralytics)\n- [mikelbrostrom yolov8_tracking](https://github.com/mikel-brostrom/yolov8_tracking)\n- [orjson](https://github.com/ijl/orjson)\n- [icons8](https://icons8.com/)\n"
  },
  {
    "path": "docs/main_features/Export.md",
    "content": "---\nicon: \":outbox_tray:\"\norder: 3\n---\n\n# Export\nThis page is under construction 🚧, please check back later."
  },
  {
    "path": "docs/main_features/SAM.md",
    "content": "---\nicon: https://github.com/0ssamaak0/DLTA-AI/blob/master/DLTA_AI_app/labelme/icons/SAM.png?raw=true\norder: 10\n---\n\n# Segment Anything (SAM)\nMETA AI model [Segment Anything](https://segment-anything.com/) or `SAM` is integrated in DLTA-AI in many ways to increase the accuracy of the Annotation process, in a very native user expirience with almost zero effort to install.\n\n## Installation\nLike all other models, The [Model Explorer](\"../model_selection/model_explorer.md\") can be used to install the checkpoints directly with just a single click\n\n[!embed](https://youtu.be/8g15M9bE1uA?t=5)\n\n## Segmentation\nSegment Anything can be used to make Zero-Shot Segmentation of any object.\nDLTA-AI provides an expiernce similar to the [Original Demo](https://segment-anything.com/demo#) with the simple SAM toolbar that supports user-customized shortcuts, and runs locally on the user machine on any image or video.\n\n[!embed](https://youtu.be/8g15M9bE1uA?t=41)\n\n## Enhance Polygons\nBeside the usual functionality of Zero-Shot Segmentation, Segment Anything can be used to enhance the accuracy of any polygon, weather it was created by the user or by any other model, by simply selecting the polygon(s) and enhancing them from the toolbar or the context menu.\n\n\n[!embed](https://youtu.be/8g15M9bE1uA?t=84)\n\n## Interpolation Tracking\nDLTA-AI utilizes the power of Segment Anything to provide a very accurate interpolation tracking, that can be used to track any object in a video, and can be used to track multiple objects at the same time.\n\n[!embed](https://youtu.be/8g15M9bE1uA?t=116)"
  },
  {
    "path": "docs/main_features/index.yml",
    "content": "label: Main Features\nicon: \":star2:\"\nexpanded: true\norder: 40"
  },
  {
    "path": "docs/main_features/inputs.md",
    "content": "---\nicon: \":clapper:\"\norder: 8\n---\n\n# Input Modes\nDLTA-AI provides different options for inputs\n\n## Image Mode\nimage mode is very simple, just open an imgae and start annotating\n\n[!embed](https://youtu.be/zkm_GhX8OtM?t=6)\n## Directory Mode\nDirectory mode is used to annotate a directory of images, it's very useful when you have a dataset of images and you want to annotate them all at once.\n\nNote that it shows all images within the directory and all subdirectories.\n\n[!embed](https://youtu.be/zkm_GhX8OtM?t=17)\n## Video Mode\nVideo mode is used to annotate a video, and provides an integrated video player that allows you to naivgate, play, pause, forward, backward, and jump to a specific frame.\n\n[!embed](https://youtu.be/zkm_GhX8OtM?t=41)\n\n## Video as Frames\nyou can open a video as a directory of frames, this is useful when you want to just annotate some frames of a video. you have the option to set start and end frame, and also the sampling rate i.e., the step between frames.\n\n[!embed](https://youtu.be/zkm_GhX8OtM?t=71)\n"
  },
  {
    "path": "docs/main_features/segmentation.md",
    "content": "---\nicon: \":art:\"\norder: 9\n---\n\n# Segmentation\nInstance Segmentation is one of the major features in DLTA-AI, from the huge library to the different options and pramaters, to the ability to apply manual edits to the results, DLTA-AI proivdes a fully customizable and easy to use segmentation experience.\n## Model Selection\nThe model Selection can be done directly by selecting a segmentation model from the menu, or by selecting a model from the huge library of models in the [Model Explorer](../model_selection/model_explorer.md)\n\n[!embed](https://youtu.be/bYjy82Ug2wU?t=10)\n## Inferencing\nThe model can be run on the current image only (works in all [input modes](inputs.md)) or on all images (directory mode only) \n\n[!embed](https://youtu.be/bYjy82Ug2wU?t=28)\n## Visualization Options\nyou can select the visualization options from the menu, such as showing the segmentation mask, or just bounding box, and the class name and the cofidence scor as well\n\n[!embed](https://youtu.be/bYjy82Ug2wU?t=60)\n## Select Classes\nyou can select some classes among the 80 of [COCO classes](https://cocodataset.org/) you can select for just this use (forgotten when you close DLTA-AI) or set them as default classes (saved when you close DLTA-AI)\n\n\n[!embed](https://youtu.be/bYjy82Ug2wU?t=84)\n## Thresholds\nTo give the annotator the full control and the ability to choose the optimum point in the precision/recall tradeoff, DLTA-AI provides 2 thresholding options\n\n### Confidence Threshold\nConfidence threshold is very simple, by just typing the threshold value of setting it through the slider, all predictions with confidence less than the threshold will be ignored\n\n[!embed](https://youtu.be/bYjy82Ug2wU?t=112)\n\n### IOU Threshold (For Non Maximum Suppression)\nDLTA-AI internally applies Non Maximum Suppression (NMS) to the predictions, to remove the overlapping predictions, the IOU threshold is the threshold used in NMS.\n\n[!embed](https://youtu.be/bYjy82Ug2wU?t=150)"
  },
  {
    "path": "docs/main_features/tracking/index.yml",
    "content": "label: Tracking\nicon: https://github.com/0ssamaak0/DLTA-AI/blob/master/DLTA_AI_app/labelme/icons/tracking.png?raw=true\nexpanded: true\norder: 4"
  },
  {
    "path": "docs/main_features/tracking/interpolation.md",
    "content": "---\nicon: \norder: 1\n---\n\n# Interpolation Tracking\n\n## Interpolation Method\n\n### Linear Interpolation\n\n### SAM Interpolation\n\n## Interpolation Between\n\n### Selected Keyframes\n\n### Detected Frames\n\n"
  },
  {
    "path": "docs/main_features/tracking/tracking.md",
    "content": "---\nicon: \norder: 2\n---\n\n# Tracking\n\n## Model Selection\n\n## Tracking Options\n\n## Visualization Options\n\n## Edit propagation\n\n## Delete Options"
  },
  {
    "path": "docs/model_selection/index.yml",
    "content": "label: Model Selection\nicon: \":robot_face:\"\nexpanded: true\norder: 30"
  },
  {
    "path": "docs/model_selection/merge.md",
    "content": "---\nicon: https://github.com/0ssamaak0/DLTA-AI/blob/master/DLTA_AI_app/labelme/icons/merge.png?raw=true\norder: 5\n\n---\n\n# Merge Models \nThis page is under construction 🚧, please check back later."
  },
  {
    "path": "docs/model_selection/model_explorer.md",
    "content": "---\nicon: https://github.com/0ssamaak0/DLTA-AI/blob/master/DLTA_AI_app/labelme/icons/checklist.png?raw=true\norder: 10\n\n---\n\n# Model Explorer\nThis page is under construction 🚧, please check back later."
  },
  {
    "path": "docs/retype.yml",
    "content": "input: .\noutput: .retype\n\nurl: # Add your website address here\n\nbranding:\n  title: DLTA-AI User Guide\n  logo: assets/icon.png\n  colors:\n    label:\n      text: \"#ffffff\"\n      background: \"#ff0000\"\n\nfavicon: assets/icon.png\n\nlinks:\n- text: GitHub\n  link: https://github.com/0ssamaak0/DLTA-AI\n  icon: mark-github\n\n- text: Issues\n  link: https://github.com/0ssamaak0/DLTA-AI/issues\n  icon: bug\n\n- text: Release Notes\n  link: https://github.com/0ssamaak0/DLTA-AI/releases\n  icon: note\n\n- text: Youtube Channel\n  link: https://www.youtube.com/@DLTA-AI\n  icon: video\n\n\nfooter:\n  copyright: \"DLTA-AI is licensed under GPLv3 License\" \n  links:                               \n    - text: License    \n      link: https://github.com/0ssamaak0/DLTA-AI/blob/master/LICENSE\n      icon: law\n\n"
  },
  {
    "path": "docs/user_interface.md",
    "content": "---\nlabel: User Interface\nicon: https://github.com/0ssamaak0/DLTA-AI/blob/master/DLTA_AI_app/labelme/icons/UI.png?raw=true\norder: 1\n---\nThis page is under construction 🚧, please check back later."
  },
  {
    "path": "releasenotes.md",
    "content": "# New Features 🌟\n-\n\n# Bug Fixes 🐞\n- Closing the save dialog without saving annotations no more enables the export button #45"
  },
  {
    "path": "requirements.txt",
    "content": "PyQt6==6.6.0\r\nQtPy==2.3.1\r\ntermcolor==2.2.0\r\nimgviz==1.7.2\r\nopencv-python==4.7.0.72\r\npyqtdarktheme==2.1.0\r\nsupervision==0.3.2\r\ngdown==4.7.1\r\nultralytics==8.0.61\r\nonemetric==0.1.2\r\neasydict==1.10\r\nopenmim==0.3.2\r\nmmdet==2.25.2\r\nscikit-image==0.20.0\r\nfilterpy==1.4.5\r\nsegment-anything==1.0\r\nlap==0.4.0\r\norjson==3.8.12\r\nnotify-py==0.3.42\r\npsutil==5.9.4\r\nshapely==2.0.1\r\nscreeninfo==0.8.1\r\n"
  },
  {
    "path": "setup.py",
    "content": "import setuptools\n\nwith open(\"README.md\", \"r\", encoding=\"utf8\") as fh:\n    long_description = fh.read()\n\nwith open(\"requirements.txt\", \"r\") as f:\n    requirements = f.read().splitlines()\n    \n__version__ = \"0.0.0\"\nwith open(\"DLTA_AI_app/labelme/__init__.py\", \"r\") as f:\n    for line in f.readlines():\n        if line.startswith(\"__version__\"):\n            __version__ = line.split(\"=\")[1].strip().strip('\"')\n            break\n\nsetuptools.setup(\n    name=\"DLTA-AI\",\n    version=f\"{__version__}\",\n    author=\"0ssamaak0\",\n    author_email=\"0ssamaak0@gmail.com\",\n    description=\"DLTA-AI is the next generation of annotation tools, integrating the power of Computer Vision SOTA models to Labelme in a seamless expirence and intuitive workflow to make creating image datasets easier than ever before\",\n    long_description=long_description,\n    long_description_content_type=\"text/markdown\",\n    url=\"https://github.com/0ssamaak0/DLTA-AI\",\n    package_dir={\"DLTA_AI_app\": \"DLTA_AI_app\"},\n    python_requires='>=3.8',\n    install_requires=requirements,\n    package_data={\"\": [\"*\"]},\n    license=\"GPLv3\",\n    entry_points={\n        \"console_scripts\": [\n            \"DLTA-AI=DLTA_AI_app.__main__:main\"\n        ]\n    }\n)\n\n"
  },
  {
    "path": "yolo training commands.txt",
    "content": "yolo task=segment mode=train model=yolov8n-seg.pt epochs= 1 imgsz = 320 workers =2 batch = 4 data=datasets\\thermalseg.v6i.yolov8\\data.yaml"
  }
]